/*
* -------------------------------------------------------------------------
* Copyright 2014
* Centre for Information Modeling - Austrian Centre for Digital Humanities
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
* -------------------------------------------------------------------------
*/
package org.emile.cirilo.dialog;
import org.emile.cirilo.*;
import org.emile.cirilo.business.MDMapper;
import org.emile.cirilo.business.Session;
import org.emile.cirilo.business.EDM;
import org.emile.cirilo.ecm.templates.*;
import org.emile.cirilo.ecm.repository.*;
import org.emile.cirilo.gui.jtable.HarvesterTableModel;
import org.emile.cirilo.oai.*;
import org.emile.cirilo.utils.ImageTools;
import org.geonames.WebService;
import org.apache.log4j.Logger;
import voodoosoft.jroots.application.*;
import voodoosoft.jroots.core.CPropertyService;
import voodoosoft.jroots.core.CServiceProvider;
import voodoosoft.jroots.dialog.*;
import voodoosoft.jroots.exception.CException;
import java.awt.Color;
import java.awt.Cursor;
import java.awt.event.ActionEvent;
import java.awt.image.BufferedImage;
import java.io.FileWriter;
import java.io.File;
import java.io.StringReader;
import java.text.MessageFormat;
import java.util.Iterator;
import java.util.List;
import java.util.ResourceBundle;
import java.util.ArrayList;
import java.net.URL;
import java.net.URLConnection;
import java.io.*;
import javax.imageio.ImageIO;
import javax.swing.JOptionPane;
import javax.swing.JTable;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.Transformer;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.jdom.transform.JDOMResult;
import org.jdom.transform.JDOMSource;
import org.jdom.xpath.XPath;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Attribute;
import com.asprise.util.ui.progress.ProgressDialog;
public class HarvesterDialog extends CDefaultDialog {
private static Logger log = Logger.getLogger(HarvesterDialog.class);
/**
* Constructor for the SelectLayoutDialog object
*/
public HarvesterDialog() { }
/**
* Gets the accessContext attribute of the SelectLayoutDialog object
*
* @return The accessContext value
*/
public IAccessContext getAccessContext() {
CDefaultAccessContext loCxt = null;
try {
} catch (Exception ex) {
CException.record(ex, this);
}
return loCxt;
}
/**
* Description of the Method
*
* @param e Description of the Parameter
*/
public void handleCloseButton(ActionEvent e)
throws Exception {
JTable tb = (JTable) getGuiComposite().getWidget("jtRepositories");
org.emile.cirilo.dialog.CBoundSerializer.save(this.getCoreDialog(), se.getHarvesterDialogProperties(), tb);
close();
}
/**
* Description of the Method
*
* @param e Description of the Parameter
* @exception Exception Description of the Exception
*/
public void handleStartButton(ActionEvent e) throws Exception {
new Thread() {
public void run() {
try {
JTable tb = (JTable) getGuiComposite().getWidget("jtRepositories");
int[] selected = tb.getSelectedRows();
edm = new EDM(user);
MessageFormat msgFmt = new MessageFormat(res.getString("askharv"));
Object[] args = {new Integer(selected.length).toString()};
String time = new java.sql.Timestamp(System.currentTimeMillis()).toString();
logfile = logdir + System.getProperty( "file.separator" )+"harvest-"+time.replaceAll("[ ]", "_").replaceAll("[:]", ".")+".log";
if (!new File(logdir).exists()) {
Object[] arg = {logdir};
msgFmt = new MessageFormat(res.getString("nologdir"));
JOptionPane.showMessageDialog(null, msgFmt.format(arg),Common.WINDOW_HEADER,JOptionPane.ERROR_MESSAGE);
return;
}
logger = new FileWriter( logfile );
int liChoice = JOptionPane.showConfirmDialog(null, msgFmt.format(args), Common.WINDOW_HEADER, JOptionPane.YES_NO_OPTION, JOptionPane.QUESTION_MESSAGE);
if (liChoice == 0) {
logger.write( new java.util.Date() +res.getString("start")+" harvesting"+"\n");
getCoreDialog().setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
boolean exit = false;
for (int i=0; i<selected.length; i++) {
int x = selected[i];
String baseURL =(String) tb.getValueAt(x,2);
String metadataPrefix =(String) tb.getValueAt(x,3);
try {
if (baseURL.startsWith("http")) {
if (harvest (metadataPrefix, baseURL, ((HarvesterTableModel) tb.getModel()).getRow(x)[6], null, null)) {
if(!addItems( ((HarvesterTableModel) tb.getModel()).getRow(x))) {
exit = true;
break;
}
}
if (exit) break;
}
if (baseURL.startsWith("file:///")) {
if (collect (metadataPrefix, baseURL, null, null)) {
if(!addItems( ((HarvesterTableModel) tb.getModel()).getRow(x))) {
exit = true;
break;
}
}
if (exit) break;
}
if (baseURL.startsWith("phaidra:///")) {
if (collectfromPhaidra (metadataPrefix, baseURL, null, null)) {
if(!addItems( ((HarvesterTableModel) tb.getModel()).getRow(x))) {
exit = true;
break;
}
}
if (exit) break;
}
} catch (Exception ex) {
log.error(ex.getLocalizedMessage(),ex);
}
XPath xPath = XPath.newInstance( "/dataproviders/repository[serviceprovider='"+baseURL+"']" );
Element rep = (Element) xPath.selectSingleNode( doc );
if (rep != null) rep.getChild("updated").setText(time);
logger.write("\n");
}
Repository.modifyDatastreamByValue("cirilo:Backbone", "DATAPROVIDERS", "text/xml", outputter.outputString(doc));
edm.save();
JOptionPane.showMessageDialog( getCoreDialog(), res.getString("details")+logfile , Common.WINDOW_HEADER, JOptionPane.INFORMATION_MESSAGE);
logger.write("\n"+ new java.util.Date() +res.getString("end")+" harvesting");
logger.close();
getGuiComposite().getWidget("jbShowLogfile").setEnabled(true);
}
} catch (Exception ex) {
log.error(ex.getLocalizedMessage(),ex);
}
finally {
getCoreDialog().setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
}
}
}.start();
}
public boolean collectfromPhaidra(String metadataPrefix, String baseURL, String from, String until) {
try {
log.debug("REST request to "+baseURL.substring(11));
URLConnection con = new URL(baseURL.substring(11)).openConnection();
con.setUseCaches(false);
metadata = parser.build(con.getInputStream());
XPath xpath = XPath.newInstance("//rel:hasCollectionMember");
xpath.addNamespace(Common.xmlns_rel);
List nodes = (List) xpath.selectNodes(metadata);
ArrayList<String> members = new ArrayList<String>();
if (nodes.size() > 0) {
for (Iterator iter = nodes.iterator(); iter.hasNext();) {
Element em = (Element) iter.next();
String pid = em.getAttributeValue("resource",Common.xmlns_rdf).substring(12);
try {
con = new URL(baseURL.substring(11).replaceAll("o:[0-9]*", pid)).openConnection();
con.setUseCaches(false);
Document collection = parser.build(con.getInputStream());
try {
URLConnection view = new URL(baseURL.substring(11).replaceAll("o:[0-9]*", pid).replaceAll("Collection/get","Book/view")).openConnection();
view.setUseCaches(false);
view.getInputStream();
view = null;
} catch (Exception p) {
em.setAttribute("resource","#del",Common.xmlns_rdf);
}
} catch (Exception q) {}
}
}
/* if (members.size() > 0) {
Element rdf = metadata.getRootElement().getChild("Description",Common.xmlns_rdf);
for (int i=0; i< members.size(); i++) {
Element rel = new Element("hasCollectionMember", Common.xmlns_rel).setAttribute("resource",members.get(i),Common.xmlns_rdf);
rdf.addContent(rel);
log.debug("Add collection member "+members.get(i));
}
}
*/
con = null;
return true;
} catch (Exception e) {
try {
logger.write("\n" + new java.util.Date() + e.getLocalizedMessage() );
} catch (Exception q) {}
return false;
}
}
public boolean collect(String metadataPrefix, String baseURL, String from, String until) {
try {
metadata = parser.build(new File(baseURL.substring(8)));
return true;
} catch (Exception e) {
try {
logger.write("\n" + new java.util.Date() + e.getLocalizedMessage() );
} catch (Exception q) {}
return false;
}
}
public boolean harvest(String metadataPrefix, String baseURL, String constraints, String from, String until) {
try {
metadata = new Document();
root = new Element("OAI-PMH");
metadata.addContent(root);
ListRecords listRecords = new ListRecords(baseURL, from, until, null, metadataPrefix);
String resumptionToken = null;
int i = 0;
log.debug("REST request to "+baseURL+" with metadataPrefix "+metadataPrefix+" "+constraints);
do {
NodeList errors = listRecords.getErrors();
if (errors != null && errors.getLength() > 0) {
for (int j = 0; i< errors.getLength(); j++) {
Node item = errors.item(j);
logger.write("\n"+ new java.util.Date() +" "+item.getTextContent());
}
return false;
}
Document pass = parser.build(new StringReader(new String(listRecords.toString().getBytes(),"UTF-8")));
XPath xpath = XPath.newInstance("//oai:record"+(!constraints.isEmpty() ? "["+constraints+"]" : ""));
xpath = addNamespaces(xpath);
List records = (List) xpath.selectNodes(pass);
if (records.size() > 0) {
for (Iterator iter = records.iterator(); iter.hasNext();) {
Element em = (Element) iter.next();
root.addContent((Element) em.clone());
}
}
resumptionToken = listRecords.getResumptionToken();
if (!resumptionToken.isEmpty()) {
listRecords = new ListRecords(baseURL, resumptionToken);
i++;
log.debug("Pass "+i+" on "+baseURL+ " with resumptionToken "+resumptionToken);
} else {
break;
}
} while (true);
log.debug("Building JDOM Document from harvested metadata was successful");
return true;
} catch (Exception e) {
try {
log.error(e.getLocalizedMessage(),e);
if (!e.getLocalizedMessage().contains("bad syntax")) logger.write("\n" + new java.util.Date() + e.getLocalizedMessage() );
} catch (Exception q) {}
}
return false;
}
public boolean addItems(String[] par) throws Exception {
try {
String name = par[0];
String updated = par[1];
String serviceprovider = par[2];
String metadataprefix = par[3];
String url = par[4];
String model = par[5];
String constraints = par[6];
String icon = par[7];
String owner = par[8];
String phaidra = null;
XPath xpath = null;
if (serviceprovider.contains("phaidra:///")) {
xpath = XPath.newInstance("//rel:hasCollectionMember[@rdf:resource != '#del']");
serviceprovider = serviceprovider.substring(11);
int i = serviceprovider.indexOf("/o:");
phaidra = serviceprovider.substring(0, i);
} else {
xpath = XPath.newInstance("//oai:record");
}
xpath = addNamespaces(xpath);
byte[] stylesheet = null;
try {
stylesheet = Repository.getDatastream("cirilo:"+owner, "RECORDtoEDM" , "");
} catch (Exception ex) {
try {
stylesheet = Repository.getDatastream("cirilo:Backbone", "RECORDtoEDM" , "");
} catch (Exception q) {
Common.log(logger, q);
return false;
}
}
System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl");
Transformer transformer = TransformerFactory.newInstance().newTransformer(new StreamSource(new StringReader(new String(stylesheet))));
URLConnection con = new URL (new String(Repository.getDatastream(model , "DC_MAPPING" , ""))).openConnection();
con.setUseCaches(false);
Document mapping = parser.build(con.getInputStream());
con = null;
log.debug("Reading stylesheet RECORDtoEDM was successful");
List records = (List) xpath.selectNodes(metadata);
if (records.size() > 0) {
ProgressDialog progressDialog = new ProgressDialog( getCoreDialog(), Common.WINDOW_HEADER);
progressDialog.displayPercentageInProgressBar = true;
progressDialog.millisToDecideToPopup = 1;
progressDialog.millisToPopup = 1;
progressDialog.beginTask(name+": "+res.getString("harvcont"),records.size(), true);
progressDialog.worked(1);
int i = 0;
for (Iterator iter = records.iterator(); iter.hasNext();) {
Element em = (Element) iter.next();
i++;
if(progressDialog.isCanceled()) {
return false;
}
progressDialog.worked(1);
try {
Thread.sleep(50);
} catch (InterruptedException e) {
}
try {
Object object = null;
String iconref = null;
String uwmetadata = null;
String pid = "o:oai.";
String oid = null;
if (phaidra != null) {
oid = em.getAttributeValue("resource",Common.xmlns_rdf).replaceAll("info:fedora/", "");
pid += owner+"."+oid.replaceAll("o:", "");
} else {
pid += em.getChild("header", Common.xmlns_oai).getChild("identifier", Common.xmlns_oai).getText()
.replaceAll("info:fedora/oai:", "")
.replaceAll("o:", "")
.replaceAll("hdl:", "");
}
pid = Common.normalize(pid).replaceAll("oai\\.oai", "oai");
log.debug("Starting ingest of object "+i+" with PID "+pid);
if (!Repository.exist(pid)) {
pid = temps.cloneTemplate("info:fedora/"+model, owner, "$" + pid, (String) null);
log.debug("Creating the object "+pid+ " was successful" );
logger.write("\n" + new java.util.Date() + res.getString("creatingobject") + pid);
} else {
logger.write("\n" + new java.util.Date() + res.getString("updatingobject") + pid);
log.debug("Updating the object "+pid+ " was successful" );
}
Object path = null;
if (phaidra != null) {
iconref = icon.replaceAll("[$]self", oid);
log.debug(iconref);
uwmetadata = phaidra+"/"+oid+"/methods/bdef:Asset/getUWMETADATA";
log.debug(uwmetadata);
} else {
xpath = XPath.newInstance(url);
xpath = addNamespaces(xpath);
path = xpath.selectSingleNode(em);
}
if (path != null || phaidra != null) {
if (phaidra == null) {
if (icon.startsWith("$")) {
iconref = icon.substring(1);
} else {
XPath vpath = XPath.newInstance(icon);
vpath = addNamespaces(vpath);
object = vpath.selectSingleNode(em);
}
}
if (object != null || iconref != null) {
if (iconref == null) {
if (object instanceof Element) {
iconref = ((Element) object).getText();
}
if (object instanceof Attribute) {
iconref = ((Attribute) object).getValue();
}
}
String buf = null;
Document uwm = null;
String objref = null;
try {
if(uwmetadata != null) {
con = new URL(uwmetadata).openConnection();
con.setUseCaches(false);
uwm = parser.build(con.getInputStream());
Element collection = new Element("collection").setText(serviceprovider);
uwm.getRootElement().addContent(collection);
buf = outputter.outputString(uwm);
con = null;
objref = uwmetadata.replaceAll("getUWMETADATA", "view");
} else {
buf = outputter.outputString(em);
}
if (path instanceof Element) {
objref = ((Element) path).getText();
}
if (path instanceof Attribute) {
objref = ((Attribute) path).getValue();
}
log.debug(objref);
Repository.modifyDatastream(pid, "URL", null, "R", objref);
JDOMSource in = new JDOMSource(phaidra == null ? em : uwm.getRootElement() );
JDOMResult out = new JDOMResult();
try {
transformer.transform(in, out);
log.debug("Mapping metadata of object "+pid+ " was successful" );
} catch (Exception e) {
log.error(e.getLocalizedMessage(),e);
}
try {
Repository.modifyDatastreamByValue(pid, "RECORD", "text/xml", buf);
edm.set(out.getDocument());
Repository.modifyDatastreamByValue(pid, "EDM_STREAM", "text/xml", edm.toString());
log.debug("Updating metadata of object "+pid+ " was successful" );
} catch (Exception e) {
log.error(e.getLocalizedMessage(),e);
}
finally {
in = null;
out = null;
buf = null;
}
MDMapper m = new MDMapper(pid, outputter.outputString(mapping));
org.jdom.Document dc = parser.build( new StringReader (m.transform(parser.build(new StringReader(edm.toString())))));
dc = Common.validate(dc);
Repository.modifyDatastreamByValue(pid, "DC", "text/xml", outputter.outputString(dc));
} catch (Exception eq) {
log.error(eq.getLocalizedMessage(),eq);
}
try {
File image = File.createTempFile("temp",".tmp");
File thumbnail = File.createTempFile( "temp", ".tmp" );
URL ref = new URL(iconref);
InputStream is = ref.openStream();
OutputStream os = new FileOutputStream(image.getAbsoluteFile());
byte[] b = new byte[2048];
int length;
while ((length = is.read(b)) != -1) {
os.write(b, 0, length);
}
is.close();
os.close();
if (phaidra != null) {
BufferedImage bufferedImage = ImageIO.read(image);
BufferedImage im = new BufferedImage(bufferedImage.getWidth(), bufferedImage.getHeight(), BufferedImage.TYPE_INT_RGB);
im.createGraphics().drawImage(bufferedImage, 0, 0, Color.WHITE, null);
ImageIO.write(im, "jpg", image);
}
ImageTools.createThumbnail( image, thumbnail, 300, 240, Color.lightGray );
Repository.modifyDatastream(pid, "THUMBNAIL","image/jpeg", "M", thumbnail);
thumbnail.delete();
image.delete();
log.debug("Updating thumbnail of object "+pid+ " was successful" );
} catch (Exception eq) {
log.error(eq.getLocalizedMessage(),eq);
}
}
}
} catch (Exception e) {
log.error(e.getLocalizedMessage(),e);
}
finally {
}
}
MessageFormat msgFmt = new MessageFormat(res.getString("oaiok"));
Object[] args = {i, records.size(), serviceprovider};
JOptionPane.showMessageDialog (getCoreDialog(),msgFmt.format(args));
}
} catch (Exception ex) {
log.error(ex.getLocalizedMessage(),ex);
}
finally {
System.setProperty("javax.xml.transform.TransformerFactory", "org.apache.xalan.processor.TransformerFactoryImpl");
}
return true;
}
public void handleShowLogfileButton(ActionEvent e)
throws Exception {
TextEditor dlg = (TextEditor) CServiceProvider.getService(DialogNames.TEXTEDITOR);
dlg.set(logfile, null, "text/log", "R", null, null,null);
dlg.open();
}
/**
* Description of the Method
*
* @exception CShowFailedException Description of the Exception
*/
public void show() throws CShowFailedException {
try {
String[] names ={res.getString("provider"),res.getString("updated"),res.getString("baseurl"),res.getString("prefix"),res.getString("shownat"), res.getString("cmodel"),"Constraints","Thumbnail", res.getString("owner")};
se = (Session) CServiceProvider.getService( ServiceNames.SESSIONCLASS );
JTable tb = (JTable) getGuiComposite().getWidget("jtRepositories");
List repositories = null;
try {
doc = parser.build(user.getUrl()+"/objects/cirilo%3ABackbone/datastreams/DATAPROVIDERS/content");
logdir = doc.getRootElement().getAttributeValue("logdir");
XPath xPath = XPath.newInstance( "/dataproviders/repository[@state='active']" );
repositories = (List) xPath.selectNodes( doc );
HarvesterTableModel dm = new HarvesterTableModel(names);
if (repositories != null) {
for (Iterator iter = repositories.iterator(); iter.hasNext();) {
try { Element e = (Element) iter.next();
String[] row = new String[9];
row[0] = e.getAttributeValue("name");
row[1] = e.getChild("updated").getText();
row[2] = e.getChild("serviceprovider").getText();
row[3] = e.getChild("metadataprefix").getText();
row[4] = e.getChild("url").getText();
row[5] = e.getChild("model").getText();
row[6] = e.getChild("constraints").getText();
row[7] = e.getChild("thumbnail").getText();
row[8] = e.getChild("owner").getText();
dm.add(row);
} catch (Exception ex) {}
}
}
tb.setModel(dm);
tb.setRowSelectionInterval(0,0);
org.emile.cirilo.dialog.CBoundSerializer.load(this.getCoreDialog(), se.getHarvesterDialogProperties(), tb);
} catch (Exception e){}
} catch (Exception e) {
}
}
/**
* Description of the Method
*/
protected void cleaningUp() {
}
/**
* Description of the Method
*
* @return Description of the Return Value
*/
protected boolean closing() {
try {
} catch (Exception e) {
CException.record(e, this, false);
}
return true;
}
/**
* Description of the Method
*
* @exception COpenFailedException Description of the Exception
*/
protected void opened() throws COpenFailedException {
try {
parser = new SAXBuilder();
user = (User) CServiceProvider.getService(ServiceNames.CURRENT_USER);
temps = (TemplateSubsystem) CServiceProvider.getService(ServiceNames.TEMPLATESUBSYSTEM);
res =(ResourceBundle) CServiceProvider.getService(ServiceNames.RESOURCES);
props = (CPropertyService) CServiceProvider.getService(ServiceNames.PROPERTIES);
WebService.setUserName(props.getProperty("user","TEI.LoginName"));
Format format = Format.getRawFormat();
format.setEncoding("UTF-8");
outputter = new XMLOutputter(format);
CDialogTools.createButtonListener(this, "jbClose", "handleCloseButton");
CDialogTools.createButtonListener(this, "jbStart", "handleStartButton");
CDialogTools.createButtonListener(this, "jbShowLogfile", "handleShowLogfileButton");
getGuiComposite().getWidget("jbShowLogfile").setEnabled(false);
} catch (Exception ex) {
throw new COpenFailedException(ex);
} finally {
}
}
private XPath addNamespaces(XPath xpath) {
xpath.addNamespace(Common.xmlns_dc);
xpath.addNamespace(Common.xmlns_oai);
xpath.addNamespace(Common.xmlns_edm);
xpath.addNamespace(Common.xmlns_europeana);
xpath.addNamespace(Common.xmlns_tei_p5);
xpath.addNamespace(Common.xmlns_dcterms);
xpath.addNamespace(Common.xmlns_lido);
xpath.addNamespace(Common.xmlns_skos);
xpath.addNamespace(Common.xmlns_rdf);
xpath.addNamespace(Common.xmlns_ore);
xpath.addNamespace(Common.xmlns_owl);
xpath.addNamespace(Common.xmlns_rdaGr2);
xpath.addNamespace(Common.xmlns_wgs84_pos);
xpath.addNamespace(Common.xmlns_mets);
xpath.addNamespace(Common.xmlns_mods);
xpath.addNamespace(Common.xmlns_xlink);
xpath.addNamespace(Common.xmlns_rel);
return xpath;
}
private ResourceBundle res;
private TemplateSubsystem temps;
private CPropertyService props;
private Document doc;
private Document metadata;
private Element root;
private SAXBuilder parser;
private XMLOutputter outputter;
private Session se;
private User user;
private String logfile;
private String logdir;
private FileWriter logger;
private EDM edm;
}