/* See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * Esri Inc. licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.esri.gpt.control.webharvest.engine; import com.esri.gpt.catalog.arcims.ImsServiceException; import com.esri.gpt.catalog.context.CatalogIndexException; import com.esri.gpt.catalog.harvest.history.HeRecord; import com.esri.gpt.catalog.harvest.history.HeUpdateReportRequest; import com.esri.gpt.catalog.harvest.history.HeUpdateRequest; import com.esri.gpt.catalog.harvest.repository.HrRecord; import com.esri.gpt.catalog.management.MmdEnums; import com.esri.gpt.catalog.publication.CollectSourceUrisRequest; import com.esri.gpt.catalog.publication.DeleteSourceUrisRequest; import com.esri.gpt.catalog.publication.HarvesterRequest; import com.esri.gpt.catalog.schema.SchemaException; import com.esri.gpt.catalog.schema.ValidationException; import com.esri.gpt.control.webharvest.protocol.ProtocolInvoker; import com.esri.gpt.framework.context.ApplicationConfiguration; import com.esri.gpt.framework.context.ApplicationContext; import com.esri.gpt.framework.context.RequestContext; import com.esri.gpt.framework.http.HttpClientException; import com.esri.gpt.framework.jsf.MessageBroker; import com.esri.gpt.framework.mail.MailRequest; import com.esri.gpt.framework.resource.api.Native; import com.esri.gpt.framework.resource.api.Publishable; import com.esri.gpt.framework.resource.api.SourceUri; import com.esri.gpt.framework.resource.query.Criteria; import com.esri.gpt.framework.security.identity.IdentityAdapter; import com.esri.gpt.framework.security.identity.IdentityConfiguration; import com.esri.gpt.framework.security.identity.local.LocalDao; import com.esri.gpt.framework.security.principal.*; import com.esri.gpt.framework.util.Val; import com.esri.gpt.framework.xml.XsltTemplate; import com.esri.gpt.server.csw.client.NullReferenceException; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.sql.SQLException; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import org.xml.sax.SAXException; /** * Data processor. */ class LocalDataProcessor implements DataProcessor { /** notification subject transformation path */ private static final String NOTIF_SUBJECT_PATH = "gpt/harvest/notifSubject.xslt"; /** notification message transformation path */ private static final String NOTIF_MESSAGE_PATH = "gpt/harvest/notifMessage.xslt"; /** logger */ private static final Logger LOGGER = Logger.getLogger(LocalDataProcessor.class.getCanonicalName()); /** message broker */ private MessageBroker messageBroker; /** listeners */ private Harvester.Listener listener; /** base context path */ private String baseContextPath = ""; /** name */ private String name = "Local"; /** * Creates instance of the processor. * @param messageBroker message broker * @param baseContextPath base context path * @param listener listener array */ public LocalDataProcessor(String name, MessageBroker messageBroker, String baseContextPath, Harvester.Listener listener) { if (messageBroker == null) { throw new IllegalArgumentException("No message broker provided."); } this.baseContextPath = Val.chkStr(baseContextPath); if (listener == null) { throw new IllegalArgumentException("No listener provided."); } this.messageBroker = messageBroker; this.listener = listener; this.name = Val.chkStr(name, this.name); } @Override public String getName() { return name; } /** * Called upon the start of harvesting the resource. * @param unit execution unit */ @Override public void onStart(ExecutionUnit unit) { LOGGER.info("[SYNCHRONIZER] Starting processing metadata records through: " + unit); Date startTime = new Date(); // create request context RequestContext context = RequestContext.extract(null); Long maxRepRecords = context.getApplicationConfiguration().getHarvesterConfiguration().getMaxRepRecords(); if (maxRepRecords < 0) { maxRepRecords = null; } Long maxRepErrors = context.getApplicationConfiguration().getHarvesterConfiguration().getMaxRepErrors(); if (maxRepErrors < 0) { maxRepErrors = null; } ExecutionUnitHelper helper = new ExecutionUnitHelper(unit); try { // initialize report builder Criteria criteria = unit.getCriteria(); ReportBuilder rp = new ReportBuilder( criteria != null ? criteria.getMaxRecords() : null, maxRepRecords, maxRepErrors); rp.setStartTime(startTime); helper.setReportBuilder(rp); // prepare the publisher LocalDao localDao = new LocalDao(context); String uDN = localDao.readDN(unit.getRepository().getOwnerId()); Publisher publisher = new Publisher(context, uDN); unit.setPublisher(publisher); // get all existing URI's for the specific harvesting site if (unit.getCleanupFlag()) { SourceUriArray sourceUris = new SourceUriArray(new String[]{"uri", "uuid"}); helper.setSourceUris(sourceUris); collectExistingSourceURIs(context, unit.getRepository(), sourceUris); } // notify listeners listener.onHarvestStart(unit.getRepository()); } catch (Exception ex) { LOGGER.log(Level.SEVERE, "[SYNCHRONIZER] Error starting metadata processing.", ex); } finally { context.onExecutionPhaseCompleted(); } } /** * Called upon the end of harvesting of the resource. * @param unit execution unit */ @Override public void onEnd(final ExecutionUnit unit, boolean success) { final ExecutionUnitHelper helper = new ExecutionUnitHelper(unit); RequestContext context = RequestContext.extract(null); Date endTime = new Date(); // get report builder ReportBuilder rp = helper.getReportBuilder(); try { rp.setEndTime(endTime); // cleanup long deletedCount = performCleanup(context, unit, helper); rp.setDeletedCount(deletedCount); // prepare event record HeRecord event = new HeRecord(unit.getRepository()); event.setHarvestedCount((int) rp.getHarvestedCount()); event.setValidatedCount((int) rp.getValidatedCount()); event.setPublishedCount((int) rp.getPublishedCount()); event.setDeletedCount(deletedCount); // save report HeUpdateRequest updateReq = new HeUpdateRequest(context, event); updateReq.execute(); HeUpdateReportRequest updateReportReq = new HeUpdateReportRequest(context, event); ReportStream reportStream = rp.createReportStream(); InputStream in = null; try { in = reportStream.getStream(); updateReportReq.execute(in, reportStream.getLength()); } finally { if (in != null) { try { in.close(); } catch (IOException ex) { } } } sendNotification(context, event); // notify listeners listener.onHarvestEnd(unit.getRepository()); LOGGER.info("[SYNCHRONIZER] Completed processing metadata records through: " + unit + ". Harvested: " + rp.getHarvestedCount() + ", validated: " + rp.getValidatedCount() + ", published: " + rp.getPublishedCount()); } catch (Exception ex) { LOGGER.log(Level.SEVERE, "[SYNCHRONIZER] Error completing metadata records through: " + unit, ex); } finally { rp.cleanup(); try { if (helper.getSourceUris() != null) { helper.getSourceUris().close(); } } catch (IOException ex) { } context.onExecutionPhaseCompleted(); } } /** * Performs cleanup. * @param context request context * @param unit execution unit * @param helper execution unit helper */ private long performCleanup(RequestContext context, ExecutionUnit unit, final ExecutionUnitHelper helper) { // perform cleanup for the specific harvest repository final SourceUriArray sourceUris = helper.getSourceUris(); if (unit.getCleanupFlag() && sourceUris!=null) { // create Iterable based on MapEntryIterator Iterable<Map.Entry<String, String>> iterable = new Iterable<Map.Entry<String, String>>() { @Override public Iterator<Map.Entry<String, String>> iterator() { return new MapEntryIterator(sourceUris); } }; if (unit.getPublisher() != null) { // delete all records identified by each sourceUri available in Iterable ApplicationConfiguration appCfg = ApplicationContext.getInstance().getConfiguration(); boolean webharvesterCleanup = Val.chkBool(appCfg.getCatalogConfiguration().getParameters().getValue("webharvester.cleanup"), true); if (webharvesterCleanup) { LOGGER.info("[SYNCHRONIZER] Attempting to clean non-existend records from: "+unit); DeleteSourceUrisRequest deleteSourceUrisRequest = new DeleteSourceUrisRequest(context, unit.getPublisher(), iterable); try { deleteSourceUrisRequest.execute(); LOGGER.info("[SYNCHRONIZER] Cleaned "+deleteSourceUrisRequest.getDeletedCount()+" records from: "+unit); return deleteSourceUrisRequest.getDeletedCount(); } catch (Exception ex) { LOGGER.log(Level.SEVERE, "[SYNCHRONIZER] Error when attempting to clean non-existend records from: "+unit, ex); } } } } if (sourceUris!=null) { try { sourceUris.close(); } catch (IOException ex){} } return 0; } /** * Called uppon harvesting a single metadata. * @param unit execution unit * @param record record to publish * @throws IOException if reading metadata fails * @throws SAXException if processing metadata fails * @throws CatalogIndexException if operation on index fails * @throws TransformerException if processing metadata fails */ @Override public void onMetadata(ExecutionUnit unit, Publishable record) throws IOException, SQLException, CatalogIndexException, TransformerConfigurationException { final ExecutionUnitHelper helper = new ExecutionUnitHelper(unit); // extract record information SourceUri sourceUri = record.getSourceUri(); RequestContext context = RequestContext.extract(null); // get report builder ReportBuilder rp = helper.getReportBuilder(); try { // immediately remove from the source URI's collection if (unit.getCleanupFlag()) { helper.getSourceUris().remove("uri", sourceUri.asString()); } boolean proceed = (ProtocolInvoker.getUpdateDefinition(unit.getRepository().getProtocol()) && record instanceof Native) || (ProtocolInvoker.getUpdateContent(unit.getRepository().getProtocol()) && !(record instanceof Native)); if (proceed) { // if there is 'from-date' criteria, check for all non-native records having // non-null last update date if that date is after 'from-date'. Only such a records // should be published if (!(record instanceof Native) && unit.getCriteria().getFromDate() != null) { Date lastUpdateDate = record.getUpdateDate(); if (lastUpdateDate != null && lastUpdateDate.before(unit.getCriteria().getFromDate())) { // stop harvesting it proceed = false; } } } if (proceed) { String metadata = ""; try { // notify listeners metadata = record.getContent(); listener.onHarvestMetadata(unit.getRepository(), sourceUri, metadata); // publication request HarvesterRequest publicationRequest = new HarvesterRequest(context, unit.getPublisher(), unit.getRepository().getUuid(), sourceUri.asString(), metadata); publicationRequest.getPublicationRecord().setAutoApprove(ProtocolInvoker.getAutoApprove(unit.getRepository().getProtocol())); // if this is a repository descriptor, update repository record if (record instanceof Native && isRepositorySourceUri(sourceUri, unit.getRepository())) { String sMethod = MmdEnums.PublicationMethod.registration.toString(); publicationRequest.getPublicationRecord().setUuid(unit.getRepository().getUuid()); publicationRequest.getPublicationRecord().setPublicationMethod(sMethod); publicationRequest.getPublicationRecord().setAlternativeTitle(unit.getRepository().getName()); publicationRequest.getPublicationRecord().setLockTitle(ProtocolInvoker.getLockTitle(unit.getRepository().getProtocol())); } publicationRequest.publish(); boolean bReplaced = publicationRequest.getPublicationRecord().getWasDocumentReplaced(); LOGGER.finer("[SYNCHRONIZER] SUCCEEDED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri); // notify listeners listener.onPublishMetadata(unit.getRepository(), sourceUri, publicationRequest.getPublicationRecord().getUuid(), metadata); // create harvest report entry for the current record rp.createEntry(sourceUri.asString(), !bReplaced); } catch (NullReferenceException ex) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri, ex); } else { LOGGER.finer("[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri + ", details: " + ex.getMessage()); } rp.createInvalidEntry(sourceUri.asString(), Arrays.asList(new String[]{ex.getMessage()})); listener.onPublishException(unit.getRepository(), sourceUri, metadata, ex); } catch (ValidationException ex) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri, ex); } else { LOGGER.finer("[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri + ", details: " + ex.getMessage()); } ArrayList<String> messages = new ArrayList<String>(); ex.getValidationErrors().buildMessages(messageBroker, messages, true); rp.createInvalidEntry(sourceUri.asString(), messages); listener.onPublishException(unit.getRepository(), sourceUri, metadata, ex); } catch (IllegalArgumentException ex) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri, ex); } else { LOGGER.finer("[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri + ", details: " + ex.getMessage()); } rp.createUnpublishedEntry(sourceUri.asString(), Arrays.asList(new String[]{ex.getMessage()})); listener.onPublishException(unit.getRepository(), sourceUri, metadata, ex); } catch (SchemaException ex) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri, ex); } else { LOGGER.finer("[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri + ", details: " + ex.getMessage()); } rp.createInvalidEntry(sourceUri.asString(), Arrays.asList(new String[]{ex.getMessage()})); listener.onPublishException(unit.getRepository(), sourceUri, metadata, ex); } catch (ImsServiceException ex) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri, ex); } else { LOGGER.finer("[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri + ", details: " + ex.getMessage()); } rp.createUnpublishedEntry(sourceUri.asString(), Arrays.asList(new String[]{ex.getMessage()})); listener.onPublishException(unit.getRepository(), sourceUri, metadata, ex); } catch (SAXException ex) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri, ex); } else { LOGGER.finer("[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri + ", details: " + ex.getMessage()); } rp.createInvalidEntry(sourceUri.asString(), Arrays.asList(new String[]{ex.getMessage()})); listener.onPublishException(unit.getRepository(), sourceUri, metadata, ex); } catch (TransformerConfigurationException ex) { throw ex; } catch (TransformerException ex) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri, ex); } else { LOGGER.finer("[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri + ", details: " + ex.getMessage()); } rp.createInvalidEntry(sourceUri.asString(), Arrays.asList(new String[]{ex.getMessage()})); listener.onPublishException(unit.getRepository(), sourceUri, metadata, ex); } catch (HttpClientException ex) { if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.log(Level.FINEST, "[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri, ex); } else { LOGGER.finer("[SYNCHRONIZER] FAILED processing metadata #" + (rp.getHarvestedCount() + 1) + " through: " + unit + ", source URI: " + sourceUri + ", details: " + ex.getMessage()); } rp.createInvalidEntry(sourceUri.asString(), Arrays.asList(new String[]{ex.getMessage()})); listener.onPublishException(unit.getRepository(), sourceUri, metadata, ex); } } } finally { context.onExecutionPhaseCompleted(); } } /** * Called upon iteration exception. * @param unit execution unit * @param ex exception */ @Override public void onIterationException(ExecutionUnit unit, Exception ex) { LOGGER.log(Level.SEVERE, "[SYNCHRONIZER] Iteration exception through: " + unit, ex); unit.setCleanupFlag(false); ExecutionUnitHelper helper = new ExecutionUnitHelper(unit); ReportBuilder rp = helper.getReportBuilder(); if (rp!=null) { rp.setException(ex); } listener.onIterationException(unit.getRepository(), ex); } /** * Collects existing source URI's for the given repository. * @param context request context * @param repository repository * @param sourceUris source URI's collector * @throws SQLException if accessing database fails * @throws IOException if accessing index fails */ private void collectExistingSourceURIs(RequestContext context, HrRecord repository, final SourceUriArray sourceUris) throws SQLException, IOException { CollectSourceUrisRequest request = new CollectSourceUrisRequest(context, repository) { @Override protected void onSourceUri(String sourceUri, String uuid) throws IOException { sourceUris.add(new String[]{sourceUri, uuid}); } }; request.execute(); } /** * Sends email notification about completed harvest. * @param context request context * @param event harvest event * @throws SQLException if accessing database fails */ private void sendNotification(RequestContext context, HeRecord event) { try { // check repository record to see if notification has to be sent HrRecord record = event.getRepository(); if (record.getSendNotification()) { // create email addresses storage ArrayList<String> emailAddresses = new ArrayList<String>(); // get distingushed name for the record owner LocalDao localDao = new LocalDao(context); String uDN = localDao.readDN(record.getOwnerId()); // obtain LDAP adapter IdentityAdapter ldapAdapter = context.newIdentityAdapter(); // declare users Users users = new Users(); // check if the owner is a group Group group = null; IdentityConfiguration idConfig = context.getIdentityConfiguration(); Groups mgmtGroups = idConfig.getMetadataManagementGroups(); if (mgmtGroups != null) { group = mgmtGroups.get(uDN); } if (group != null) { // read all members of the group users = ldapAdapter.readGroupMembers(uDN); for (User user : users.values()) { ldapAdapter.readUserProfile(user); String emailAddress = user.getProfile().getEmailAddress(); if (emailAddress.length() > 0) { emailAddresses.add(emailAddress); } } } else { User user = new User(); user.setDistinguishedName(uDN); ldapAdapter.readUserProfile(user); String emailAddress = user.getProfile().getEmailAddress(); if (emailAddress.length() > 0) { emailAddresses.add(emailAddress); } } // if is there any address if (!emailAddresses.isEmpty()) { String link = baseContextPath.length() > 0 ? baseContextPath + "/catalog/harvest/report.page?uuid=" + Val.escapeXml(event.getUuid()) : ""; String notification = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<notification>" + "<eventId>" + Val.escapeXml(event.getUuid()) + "</eventId>" + "<eventDate>" + Val.escapeXml(event.getHarvestDate().toString()) + "</eventDate>" + "<repositoryId>" + Val.escapeXml(record.getUuid()) + "</repositoryId>" + "<repositoryName>" + Val.escapeXml(record.getName()) + "</repositoryName>" + "<reportLink>" + link + "</reportLink>" + "</notification>"; // create notification subject and message using transformations XsltTemplate notifSubjectTemplate = XsltTemplate.makeFromResourcePath(NOTIF_SUBJECT_PATH); XsltTemplate notifMessageTemplate = XsltTemplate.makeFromResourcePath(NOTIF_MESSAGE_PATH); String notifSubject = notifSubjectTemplate.transform(notification); String notfiMessage = notifMessageTemplate.transform(notification); // send email to each recipient for (String emailAddress : emailAddresses) { // create and init mail request MailRequest mailRequest = context.getMailConfiguration().newOutboundRequest(); mailRequest.setMimeTypeHtml(); mailRequest.setToAddress(emailAddress); mailRequest.setSubject(notifSubject); mailRequest.setBody(notfiMessage); // send email notification mailRequest.send(); } } } } catch (Exception ex) { LOGGER.log(Level.SEVERE, "[SYNCHRONIZER] Error sending email notification", ex); } } /** * Checks if a given sourceUri represents repository. * @param sourceUri source URI to check * @param record repository record * @return */ private boolean isRepositorySourceUri(SourceUri sourceUri, HrRecord record) { try { URL recUrl = new URL(record.getHostUrl()); URL srcUrl = new URL(sourceUri.asString()); return recUrl.getProtocol().equalsIgnoreCase(srcUrl.getProtocol()) && recUrl.getHost().equalsIgnoreCase(srcUrl.getHost()) && recUrl.getPort() == srcUrl.getPort() && recUrl.getPath().equalsIgnoreCase(srcUrl.getPath()); } catch (MalformedURLException ex) { return false; } } /** * Specialized adapter converting SourceUriArray iterator into * Map.Entry<String, String> iterator */ private static class MapEntryIterator implements Iterator<Map.Entry<String, String>> { /** source URI's iterator */ private Iterator<String[]> sourceUrisIterator; /** * Creates instance of the iterator. * @param collector collector */ public MapEntryIterator(SourceUriArray collector) { this.sourceUrisIterator = collector.iterator(); } @Override public boolean hasNext() { return sourceUrisIterator.hasNext(); } @Override public Map.Entry<String, String> next() { // get nest element for the collector final String[] n = sourceUrisIterator.next(); // create new Map Entry return new Map.Entry<String, String>() { @Override public String getKey() { return n[0]; } @Override public String getValue() { return n[1]; } @Override public String setValue(String value) { String oldValue = n[1]; n[1] = value; return oldValue; } }; } @Override public void remove() { sourceUrisIterator.remove(); } } }