/*******************************************************************************
* Copyright 2006 - 2012 Vienna University of Technology,
* Department of Software Technology and Interactive Systems, IFS
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, softwareBecker
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package eu.scape_project.planning.xml;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import javax.inject.Inject;
import javax.persistence.EntityManager;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.WriterOutputStream;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.XPath;
import org.dom4j.io.DocumentSource;
import org.slf4j.Logger;
import eu.scape_project.planning.exception.PlanningException;
import eu.scape_project.planning.manager.DigitalObjectManager;
import eu.scape_project.planning.manager.StorageException;
import eu.scape_project.planning.model.DigitalObject;
import eu.scape_project.planning.model.Plan;
import eu.scape_project.planning.model.PlanProperties;
import eu.scape_project.planning.utils.FileUtils;
import eu.scape_project.planning.utils.OS;
/**
* This class provides methods to export plans from the database to their XML representation.
*
* @author Christoph Becker
*/
public class ProjectExportAction implements Serializable {
private static final long serialVersionUID = 2155152208617526555L;
/**
* Boundary of data to load before calling the garbage collector.
*/
private static final int LOADED_DATA_SIZE_BOUNDARY = 200 * 1024 * 1024;
@Inject
private Logger log;
@Inject
private EntityManager em;
@Inject
protected DigitalObjectManager digitalObjectManager;
private String lastProjectExportPath;
public ProjectExportAction() {
lastProjectExportPath = null;
}
/**
* Exports all projects into separate xml files and adds them to a zip
* archive.
*
* @return True if export was successful, false otherwise.
*/
public boolean exportAllProjectsToZip() {
@SuppressWarnings("unchecked")
List<PlanProperties> ppList = em.createQuery("select p from PlanProperties p order by p.id").getResultList();
return exportPPListToZip(ppList);
}
/**
* Exports all plans where the {@link PlanProperties#getId()} is in the given range [fromID, toID] (inclusive)
* and adds them to a zip archive
*
* @param fromID
* from-ID in table PlanProperties, which is used to filter
* PlanProperties
* @param toID
* to-ID in table PlanProperties, which is used to filter
* PlanProperties
* @return True if export was successful, false otherwise.
*/
public boolean exportSomeProjectsToZip(int fromID, int toID) {
@SuppressWarnings("unchecked")
List<PlanProperties> ppList = em.createQuery(
"select p.planProperties from Plan p where " + " p.planProperties.id >= :fromID "
+ " and p.planProperties.id <= :toID order by p.planProperties.id")
.setParameter("fromID", fromID)
.setParameter("toID", toID)
.getResultList();
return exportPPListToZip(ppList);
}
/**
* Exports the project identified by PlanProperties.Id ppid and writes the
* document to the given OutputStream - including all binary data.
* (currently required by {@link #exportAllProjectsToZip()} ) - Does NOT
* clean up temp files written to baseTempPath
*
* @param ppid
* @param out
* @param baseTempPath
* used to write temp files for binary data, must not be used by
* other exports at the same time
* @return True if export was successful, false otherwise.
*/
public boolean exportComplete(int ppid, OutputStream out, String baseTempPath) {
ProjectExporter exporter = new ProjectExporter();
Document doc = exporter.createProjectDoc();
Plan plan = null;
try {
plan = em.createQuery("select p from Plan p where p.planProperties.id = :ppid ", Plan.class)
.setParameter("ppid", ppid)
.getSingleResult();
} catch (Exception e) {
log.error("Could not load planProperties: ", e);
log.debug("Skipping the export of the plan with properties " + ppid + ": Couldnt load.");
return false;
}
try {
String tempPath = baseTempPath;
File tempDir = new File(tempPath);
tempDir.mkdirs();
try {
exporter.addProject(plan, doc, false);
// Perform XSLT transformation to get the DATA into the PLANS
// Prepare base 64 encoded binary data
List<Integer> binaryObjectIds = getBinaryObjectIds(doc);
writeBinaryObjects(binaryObjectIds, tempPath);
// Prepare preservation action plan
List<Integer> preservationActionPlanIDs = getPreservationActionPlanIds(doc);
writeDigitalObjects(preservationActionPlanIDs, tempPath);
// Call XSLT
addBinaryData(doc, out, tempPath);
} catch (IOException e) {
log.error("Could not open outputstream.", e);
return false;
} catch (TransformerException e) {
log.error("failed to generate export file.", e);
return false;
} catch (StorageException e) {
log.error("Could not load object from stoarge.", e);
return false;
} catch (PlanningException e) {
log.error("Could not export plan.", e);
return false;
}
} finally {
// Clean up
plan = null;
em.clear();
System.gc();
}
return true;
}
/**
* Returns a list of object IDs that are stored in the document without
* binary data.
*
* @param doc
* the document to search
* @return a list of IDs
*/
private List<Integer> getBinaryObjectIds(Document doc) {
// Get data elements that have data and a number as content
XPath xpath = doc.createXPath("//plato:data[@hasData='true' and number(.) = number(.)]");
Map<String, String> namespaceMap = new HashMap<String, String>();
namespaceMap.put("plato", PlanXMLConstants.PLATO_NS);
xpath.setNamespaceURIs(namespaceMap);
@SuppressWarnings("unchecked")
List<Element> elements = xpath.selectNodes(doc);
List<Integer> objectIds = new ArrayList<Integer>(elements.size());
for (Element element : elements) {
objectIds.add(Integer.parseInt(element.getStringValue()));
}
return objectIds;
}
/**
* Returns the collection profile IDs that are in the document without data.
*
* @param doc
* the docuemnt to seasrch
* @return a list of IDs
*/
private List<Integer> getPreservationActionPlanIds(Document doc) {
// Get data elements that have data and a number as content
XPath xpath = doc.createXPath("//plato:preservationActionPlan[number(.) = number(.)]");
Map<String, String> namespaceMap = new HashMap<String, String>();
namespaceMap.put("plato", PlanXMLConstants.PLATO_NS);
xpath.setNamespaceURIs(namespaceMap);
@SuppressWarnings("unchecked")
List<Element> elements = xpath.selectNodes(doc);
List<Integer> objectIds = new ArrayList<Integer>(elements.size());
for (Element element : elements) {
objectIds.add(Integer.parseInt(element.getStringValue()));
}
return objectIds;
}
/**
* Writes the digital objects of the provided objectIds to the tempDir as
* files.
*
* @param objectIds
* the IDs of the objects to write
* @param tempDir
* a temporary directory where the files will be written
* @throws IOException
* if an error occurred during write
* @throws StorageException
* if the objects could not be loaded
*/
private void writeDigitalObjects(List<Integer> objectIds, String tempDir) throws IOException, StorageException {
int counter = 0;
int skip = 0;
log.info("Writing bytestreams of digital objects. Size = " + objectIds.size());
for (Integer id : objectIds) {
if (counter > LOADED_DATA_SIZE_BOUNDARY) { // Call GC if unused data
// exceeds boundary
System.gc();
counter = 0;
}
DigitalObject object = em.find(DigitalObject.class, id);
if (object.isDataExistent()) {
counter += object.getData().getSize();
File f = new File(tempDir + object.getId() + ".xml");
DigitalObject dataFilledObject = digitalObjectManager.getCopyOfDataFilledDigitalObject(object);
FileOutputStream out = new FileOutputStream(f);
try {
out.write(dataFilledObject.getData().getData());
} finally {
out.close();
}
dataFilledObject = null;
} else {
skip++;
}
object = null;
}
em.clear();
System.gc();
log.info("Finished writing bytestreams of digital objects. Skipped empty objects: " + skip);
}
/**
* new helper method that was refactored from
* {@link #exportAllProjectsToZip()} It takes a list of
* {@link PlanProperties} and exports it to a zip file.
*
* @param ppList
* {@link PlanProperties} for plans to export
*
* @return True if export was successful, false otherwise.
*/
private boolean exportPPListToZip(List<PlanProperties> ppList) {
if (!ppList.isEmpty()) {
log.debug("number of plans to export: " + ppList.size());
String filename = "allprojects.zip";
lastProjectExportPath = OS.getTmpPath() + "export" + System.currentTimeMillis() + "/";
new File(lastProjectExportPath).mkdirs();
String binarydataTempPath = lastProjectExportPath + "binarydata/";
File binarydataTempDir = new File(binarydataTempPath);
binarydataTempDir.mkdirs();
try {
OutputStream out = new BufferedOutputStream(new FileOutputStream(lastProjectExportPath + filename));
ZipOutputStream zipOut = new ZipOutputStream(out);
for (PlanProperties pp : ppList) {
log.debug("EXPORTING: " + pp.getName());
ZipEntry zipAdd = new ZipEntry(String.format("%1$03d", pp.getId()) + "-"
+ FileUtils.makeFilename(pp.getName()) + ".xml");
zipOut.putNextEntry(zipAdd);
// export the complete project, including binary data
exportComplete(pp.getId(), zipOut, binarydataTempPath);
zipOut.closeEntry();
}
zipOut.close();
out.close();
new File(lastProjectExportPath + "finished.info").createNewFile();
// FacesMessages.instance().add(FacesMessage.SEVERITY_INFO,
// "Export was written to: " + exportPath);
log.info("Export was written to: " + lastProjectExportPath);
} catch (IOException e) {
// FacesMessages.instance().add(FacesMessage.SEVERITY_ERROR,
// "An error occured while generating the export file.");
log.error("An error occured while generating the export file.", e);
File errorInfo = new File(lastProjectExportPath + "error.info");
try {
Writer w = new FileWriter(errorInfo);
w.write("An error occured while generating the export file:");
w.write(e.getMessage());
w.close();
} catch (IOException e1) {
log.error("Could not write error file.");
}
return false;
} finally {
// remove all binary temp files
OS.deleteDirectory(binarydataTempDir);
}
}
return true;
}
/**
* Performs XSLT transformation to get the data into the plans.
*
* @param doc
* the plan document
* @param out
* output stream to write the transformed plan XML
* @param tempDir
* temporary directory where the data files are located
* @throws TransformerException
* if an error occured during transformation
*/
private void addBinaryData(Document doc, OutputStream out, String tempDir) throws TransformerException {
InputStream xsl = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("data/xslt/bytestreams.xsl");
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer(new StreamSource(xsl));
transformer.setParameter("tempDir", tempDir);
Source xmlSource = new DocumentSource(doc);
Result outputTarget = new StreamResult(out); // new
// FileWriter(outFile));
log.debug("starting bytestream transformation ...");
transformer.transform(xmlSource, outputTarget);
log.debug("FINISHED bytestream transformation!");
}
/**
* Loads all binary data for the given digital objects and dumps it to XML
* files, located in tempDir.
*
* @param objectIds
* @param tempDir
* @param encoder
* @throws IOException
* @throws StorageException
*/
private void writeBinaryObjects(List<Integer> objectIds, String aTempDir)
throws IOException, StorageException {
int counter = 0;
int skip = 0;
log.info("writing XMLs for bytestreams of digital objects. count = " + objectIds.size());
for (Integer id : objectIds) {
if (counter > LOADED_DATA_SIZE_BOUNDARY) { // Call GC if unused data
// exceeds boundary
System.gc();
counter = 0;
}
DigitalObject object = em.find(DigitalObject.class, id);
if (object.isDataExistent()) {
counter += object.getData().getSize();
File f = new File(aTempDir + object.getId() + ".xml");
DigitalObject dataFilledObject = null;
dataFilledObject = digitalObjectManager.getCopyOfDataFilledDigitalObject(object);
writeBinaryData(id, new ByteArrayInputStream(dataFilledObject.getData().getData()), f);
dataFilledObject = null;
} else {
skip++;
}
object = null;
}
em.clear();
System.gc();
log.info("Finished writing bytestreams of digital objects. Skipped empty objects: " + skip);
}
/**
* Dumps binary data to provided file. It results in an XML file with a
* single element: data.
*
* @param id
* @param data
* @param f
* @param encoder
* @throws IOException
*/
private static void writeBinaryData(int id, InputStream data, File f) throws IOException {
XMLOutputFactory factory = XMLOutputFactory.newInstance();
try {
XMLStreamWriter writer = factory.createXMLStreamWriter(new FileWriter(f));
writer.writeStartDocument(PlanXMLConstants.ENCODING,"1.0");
writer.writeStartElement("data");
writer.writeAttribute("id", "" + id);
Base64InputStream base64EncodingIn = new Base64InputStream( data, true, PlanXMLConstants.BASE64_LINE_LENGTH, PlanXMLConstants.BASE64_LINE_BREAK);
OutputStream out = new WriterOutputStream(new XMLStreamContentWriter(writer) , PlanXMLConstants.ENCODING);
// read the binary data and encode it on the fly
IOUtils.copy(base64EncodingIn, out);
out.flush();
// all data is written - end
writer.writeEndElement();
writer.writeEndDocument();
writer.flush();
writer.close();
} catch (XMLStreamException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
// -------- getter/setter --------
public String getLastProjectExportPath() {
return lastProjectExportPath;
}
public void setLastProjectExportPath(String lastProjectExportPath) {
this.lastProjectExportPath = lastProjectExportPath;
}
// /**
// * Adds all enlisted plans to an XML document, but does NOT write binary
// data.
// * Instead the Id's of all referenced uploads and sample records are added
// to the provided lists,
// * this way they can be added later.
// *
// * @param ppids
// * @param uploadIDs
// * @param recordIDs
// * @return
// */
// public Document exportToXml(List<Integer> ppids, List<Integer> uploadIDs,
// List<Integer> recordIDs) {
// ProjectExporter exporter = new ProjectExporter();
// Document doc = exporter.createProjectDoc();
//
// int i = 0;
// for (Integer id: ppids) {
// // load one plan after the other:
// List<Plan> list = em.createQuery(
// "select p from Plan p where p.planProperties.id = "
// + id).getResultList();
// if (list.size() != 1) {
// FacesMessages.instance().add(FacesMessage.SEVERITY_ERROR,
// "Skipping the export of the plan with properties"+id+": Couldnt load.");
// } else {
// //log.debug("adding project "+p.getplanProperties().getName()+" to XML...");
// exporter.addProject(list.get(0), doc, uploadIDs, recordIDs);
// }
// list.clear();
// list = null;
//
// log.info("XMLExport: addString destinationed project ppid="+id);
// i++;
// if ((i%10==0)) {
// em.clear();
// System.gc();
// }
// }
// return doc;
// }
}