/* * Copyright (c) 2012 Data Harmonisation Panel * * All rights reserved. This program and the accompanying materials are made * available under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation, either version 3 of the License, * or (at your option) any later version. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * HUMBOLDT EU Integrated Project #030962 * Data Harmonisation Panel <http://www.dhpanel.eu> */ package eu.esdihumboldt.hale.common.instance.orient.storage; import java.text.MessageFormat; import java.util.Date; import java.util.List; import javax.xml.namespace.QName; import org.eclipse.core.runtime.IProgressMonitor; import org.eclipse.core.runtime.IStatus; import org.eclipse.core.runtime.Status; import org.eclipse.core.runtime.jobs.Job; import com.orientechnologies.orient.core.db.ODatabaseRecordThreadLocal; import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx; import com.orientechnologies.orient.core.intent.OIntentMassiveInsert; import com.orientechnologies.orient.core.record.impl.ODocument; import de.fhg.igd.slf4jplus.ALogger; import de.fhg.igd.slf4jplus.ALoggerFactory; import de.fhg.igd.slf4jplus.ATransaction; import eu.esdihumboldt.hale.common.core.report.Message; import eu.esdihumboldt.hale.common.core.report.ReportHandler; import eu.esdihumboldt.hale.common.core.report.Reporter; import eu.esdihumboldt.hale.common.core.report.impl.DefaultReporter; import eu.esdihumboldt.hale.common.core.report.impl.MessageImpl; import eu.esdihumboldt.hale.common.core.service.ServiceProvider; import eu.esdihumboldt.hale.common.instance.model.DataSet; import eu.esdihumboldt.hale.common.instance.model.Instance; import eu.esdihumboldt.hale.common.instance.model.InstanceCollection; import eu.esdihumboldt.hale.common.instance.model.MutableInstance; import eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference; import eu.esdihumboldt.hale.common.instance.model.ResourceIterator; import eu.esdihumboldt.hale.common.instance.orient.OInstance; import eu.esdihumboldt.hale.common.instance.processing.InstanceProcessingExtension; import eu.esdihumboldt.hale.common.instance.processing.InstanceProcessor; import eu.esdihumboldt.hale.common.schema.model.TypeDefinition; import gnu.trove.TObjectIntHashMap; import gnu.trove.TObjectIntProcedure; /** * Store instances in a database * * @author Simon Templer */ public abstract class StoreInstancesJob extends Job { private static final ALogger log = ALoggerFactory.getLogger(StoreInstancesJob.class); private InstanceCollection instances; private final LocalOrientDB database; /** * The job report, may be <code>null</code>. */ protected final Reporter<Message> report; private final ReportHandler reportHandler; private final ServiceProvider serviceProvider; /** * Create a job that stores instances in a database * * @param name the (human readable) job name * @param instances the instances to store in the database * @param database the database * @param serviceProvider the service provider * @param reportHandler the report handler, <code>null</code> if no report * should be generated */ public StoreInstancesJob(String name, LocalOrientDB database, InstanceCollection instances, final ServiceProvider serviceProvider, final ReportHandler reportHandler) { super(name); setUser(true); this.database = database; this.instances = instances; this.serviceProvider = serviceProvider; this.reportHandler = reportHandler; if (reportHandler != null) { report = new DefaultReporter<Message>("Load data into database", Message.class, false); } else { report = null; } } /** * @see Job#run(IProgressMonitor) */ @Override public IStatus run(IProgressMonitor monitor) { boolean exactProgress = instances.hasSize(); monitor.beginTask("Store instances in database", (exactProgress) ? (instances.size()) : (IProgressMonitor.UNKNOWN)); int count = 0; TObjectIntHashMap<QName> typeCount = new TObjectIntHashMap<>(); if (report != null) { // set the correct start time report.setStartTime(new Date()); } // get database connection DatabaseReference<ODatabaseDocumentTx> ref = database.openWrite(); ODatabaseDocumentTx db = ref.getDatabase(); ATransaction trans = log.begin("Store instances in database"); try { // use intent db.declareIntent(new OIntentMassiveInsert()); // Find all the InstanceProcessors to feed them the stored Instances final InstanceProcessingExtension ext = new InstanceProcessingExtension( serviceProvider); final List<InstanceProcessor> processors = ext.getInstanceProcessors(); BrowseOrientInstanceCollection browser = new BrowseOrientInstanceCollection(database, null, DataSet.SOURCE); // TODO decouple next() and save()? long lastUpdate = 0; // last count update ResourceIterator<Instance> it = instances.iterator(); int size = instances.size(); try { while (it.hasNext() && !monitor.isCanceled()) { Instance instance = it.next(); // further processing before storing processInstance(instance); // get/create OInstance OInstance conv = ((instance instanceof OInstance) ? ((OInstance) instance) : (new OInstance(instance))); conv.setInserted(true); // update the instance to store, e.g. generating metadata updateInstance(conv); ODatabaseRecordThreadLocal.INSTANCE.set(db); // configure the document ODocument doc = conv.configureDocument(db); // and save it doc.save(); // Create an InstanceReference for the saved instance and // feed it to all known InstanceProcessors. The decoration // with ResolvableInstanceReference allows the // InstanceProcessors to resolve the instances if required. ResolvableInstanceReference resolvableRef = new ResolvableInstanceReference( new OrientInstanceReference(doc.getIdentity(), conv.getDataSet(), conv.getDefinition()), browser); processors.forEach(p -> p.process(instance, resolvableRef)); count++; TypeDefinition type = instance.getDefinition(); if (type != null) { typeCount.adjustOrPutValue(type.getName(), 1, 1); } if (exactProgress) { monitor.worked(1); } long now = System.currentTimeMillis(); if (now - lastUpdate > 100) { // only update every 100 // milliseconds monitor.subTask(MessageFormat.format("{0}{1} instances processed", String.valueOf(count), size != InstanceCollection.UNKNOWN_SIZE ? "/" + String.valueOf(size) : "")); lastUpdate = now; } } } finally { it.close(); } db.declareIntent(null); } catch (RuntimeException e) { if (report != null) { reportTypeCount(report, typeCount); report.error(new MessageImpl("Error storing instances in database", e)); report.setSuccess(false); reportHandler.publishReport(report); } throw e; } finally { ref.dispose(); trans.end(); /* * Reset instances to prevent memory leak. It seems Eclipse * internally holds a reference to the job (in JobInfo and/or * ProgressMonitorFocusJobDialog) and this results in the instance * collection not being garbage collected. This is especially bad, * if an in-memory instance collection is used, e.g. a * DefaultInstanceCollection that is used when loading a Shapefile. */ instances = null; } try { onComplete(); } catch (RuntimeException e) { String message = "Error while post processing stored instances"; if (report != null) { report.error(new MessageImpl(message, e)); } else { log.error(message, e); } } String message = MessageFormat.format("Stored {0} instances in the database.", count); if (monitor.isCanceled()) { String warn = "Loading instances was canceled, incomplete data set in the database."; if (report != null) { report.warn(new MessageImpl(warn, null)); } else { log.warn(warn); } } if (report != null) { reportTypeCount(report, typeCount); report.setSuccess(true); report.setSummary(message); reportHandler.publishReport(report); } else { log.info(message); } monitor.done(); return new Status((monitor.isCanceled()) ? (IStatus.CANCEL) : (IStatus.OK), "eu.esdihumboldt.hale.common.instance.orient", message); } private void reportTypeCount(Reporter<Message> report, TObjectIntHashMap<QName> typeCount) { typeCount.forEachEntry(new TObjectIntProcedure<QName>() { @Override public boolean execute(QName typeName, int count) { StringBuilder msg = new StringBuilder("Stored "); msg.append(count); msg.append(" instances of type "); msg.append(typeName.getLocalPart()); String ns = typeName.getNamespaceURI(); if (ns != null && !ns.isEmpty()) { msg.append(" ("); msg.append(ns); msg.append(")"); } report.info(new MessageImpl(msg.toString(), null)); return true; } }); } /** * Update an instance before it is converted and saved, e.g. adding * metadata. The default implementation does nothing and may be overridden. * * @param instance the instance */ protected void updateInstance(MutableInstance instance) { // override me } /** * Process an instance before it is saved. The default implementation does * nothing and may be overridden. * * @param instance the instance, may not be changed in any way */ protected void processInstance(Instance instance) { // override me } /** * Called when the job has been completed */ protected abstract void onComplete(); }