/* Index ECM Engine - A system for managing the capture (when created * or received), classification (cataloguing), storage, retrieval, * revision, sharing, reuse and disposition of documents. * * Copyright (C) 2008 Regione Piemonte * Copyright (C) 2008 Provincia di Torino * Copyright (C) 2008 Comune di Torino * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2, * or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ package it.doqui.index.ecmengine.business.personalization.multirepository.node.index; import it.doqui.index.ecmengine.business.job.JobBusinessInterface; import it.doqui.index.ecmengine.business.personalization.multirepository.bootstrap.MultiTTenantAdminService; import it.doqui.index.ecmengine.business.personalization.multirepository.RepositoryManager; import it.doqui.index.ecmengine.business.personalization.multirepository.util.EcmEngineMultirepositoryConstants; import it.doqui.index.ecmengine.util.EcmEngineConstants; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.alfresco.error.AlfrescoRuntimeException; import org.alfresco.repo.domain.Transaction; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * Component to check and recover the indexes. * * <p> * Questo componente non aggiunge nulla all'implementazione originale. L'unica * differenza è nella superclasse, che in questo caso è stata personalizzata. * </p> * * @author Doqui */ public class IndexTransactionTracker extends AbstractReindexComponent { private static Log logger = LogFactory.getLog(EcmEngineMultirepositoryConstants.MULTIREPOSITORY_LOG_CATEGORY); private JobBusinessInterface jobManager; private long maxTxnDurationMs; private long reindexLagMs; private int maxRecordSetSize; // private boolean started; // private List<Long> previousTxnIds; private Map<String, List<Long>> previousTxnIdsMap; private long lastMaxTxnId; private Map<String, Long> lastMaxTxnIdMap; // private long fromTimeInclusive; // private Map<Long, TxnRecord> voids; private Map<String, Long> fromTimeInclusiveMap; private Map<String, Map<Long, TxnRecord>> voidsMap; /** * Set the defaults. * <ul> * <li><b>Maximum transaction duration:</b> 1 hour</li> * <li><b>Reindex lag:</b> 1 second</li> * <li><b>Maximum recordset size:</b> 1000</li> * </ul> */ public IndexTransactionTracker() { maxTxnDurationMs = 3600L * 1000L; reindexLagMs = 1000L; maxRecordSetSize = 1000; // previousTxnIds = Collections.<Long>emptyList(); previousTxnIdsMap = new HashMap<String, List<Long>>(); lastMaxTxnId = Long.MAX_VALUE; lastMaxTxnIdMap = new HashMap<String, Long>(); // fromTimeInclusive = -1L; fromTimeInclusiveMap = new HashMap<String, Long>(); // voids = new TreeMap<Long, TxnRecord>(); voidsMap = new HashMap<String, Map<Long,TxnRecord>>(); } /** * Set the expected maximum duration of transaction supported. This value is used to adjust the * look-back used to detect transactions that committed. Values must be greater than zero. * * @param maxTxnDurationMinutes the maximum length of time a transaction will take in minutes * * @since 1.4.5, 2.0.5, 2.1.1 */ public void setMaxTxnDurationMinutes(long maxTxnDurationMinutes) { if (maxTxnDurationMinutes < 1) { throw new AlfrescoRuntimeException("Maximum transaction duration must be at least one minute."); } this.maxTxnDurationMs = maxTxnDurationMinutes * 60L * 1000L; } /** * Transaction tracking should lag by the average commit time for a transaction. This will minimize * the number of holes in the transaction sequence. Values must be greater than zero. * * @param reindexLagMs the minimum age of a transaction to be considered by * the index transaction tracking * * @since 1.4.5, 2.0.5, 2.1.1 */ public void setReindexLagMs(long reindexLagMs) { if (reindexLagMs < 1) { throw new AlfrescoRuntimeException("Reindex lag must be at least 1 millisecond."); } this.reindexLagMs = reindexLagMs; } /** * Set the number of transactions to request per query. */ public void setMaxRecordSetSize(int maxRecordSetSize) { this.maxRecordSetSize = maxRecordSetSize; } @Override protected void reindexImpl() { // if (!started) // { // // Make sure that we start clean // voids.clear(); // previousTxnIds = new ArrayList<Long>(maxRecordSetSize); // lastMaxTxnId = Long.MAX_VALUE; // So that it is ignored at first // fromTimeInclusive = getStartingTxnCommitTime(); // started = true; // } // TODO: il tracking potrebbe essere sospeso solo sul repository sul quale e` in creazione il nuovo tenant. // Fermo l'IndexTransactionTracker se e' attivo il job di creazione dei tenant boolean isTenantBootstrapRunning = false; try { isTenantBootstrapRunning = jobManager.isExecuting(EcmEngineConstants.ECMENGINE_TENANT_ADMIN_JOB_REF); } catch (Exception e) { // Riportiamo semplicemente un warning logger.warn("[IndexTransactionTracker::reindexImpl] Exception accessing Job DAO on repository (" +RepositoryManager.getCurrentRepository() +")", e); } if (isTenantBootstrapRunning) { // ECM Engine sta eseguendo il bootstrap di un nuovo tenant. Sospendiamo momentaneamente l'index tracking. logger.info("[IndexTransactionTracker::reindexImpl] Tenant bootstrap running on repository (" +RepositoryManager.getCurrentRepository() +") skipping index tracking."); return; } // Fermo l'IndexTransactionTracker se e' attivo il job di delete dei tenant boolean isTenantDeleteRunning = false; try { isTenantDeleteRunning = jobManager.isExecuting(EcmEngineConstants.ECMENGINE_TENANT_DELETE_JOB_REF); } catch (Exception e) { // Riportiamo semplicemente un warning logger.warn("[IndexTransactionTracker::reindexImpl] Exception accessing Job DAO on repository (" +RepositoryManager.getCurrentRepository() +")", e); } if (isTenantDeleteRunning) { // ECM Engine sta eseguendo il bootstrap di un nuovo tenant. Sospendiamo momentaneamente l'index tracking. logger.info("[IndexTransactionTracker::reindexImpl] Tenant delete running on repository (" +RepositoryManager.getCurrentRepository() +") skipping index tracking."); return; } final String currentRepositoryId = RepositoryManager.getCurrentRepository(); if (lastMaxTxnIdMap.get(currentRepositoryId) != null) { lastMaxTxnId = lastMaxTxnIdMap.get(currentRepositoryId).longValue(); } else { // Need to initialize for current repo voidsMap.put(currentRepositoryId, new TreeMap<Long, TxnRecord>()); voidsMap.get(currentRepositoryId).clear(); previousTxnIdsMap.put(currentRepositoryId, new ArrayList<Long>(maxRecordSetSize)); lastMaxTxnId = Long.MAX_VALUE; // So that it is ignored at first fromTimeInclusiveMap.put(currentRepositoryId, new Long(getStartingTxnCommitTime())); } List<Long> curPreviousTxnIds = previousTxnIdsMap.get(currentRepositoryId); // List is mutable while (true) { long toTimeExclusive = System.currentTimeMillis() - reindexLagMs; // Check that the voids haven't been filled // fromTimeInclusive = checkVoids(fromTimeInclusive); long curFromTimeInclusive = fromTimeInclusiveMap.get(currentRepositoryId).longValue(); curFromTimeInclusive = checkVoids(curFromTimeInclusive); fromTimeInclusiveMap.put(currentRepositoryId, new Long(curFromTimeInclusive)); // get next transactions to index List<Transaction> txns = getNextTransactions(curFromTimeInclusive, toTimeExclusive, curPreviousTxnIds); if (logger.isDebugEnabled()) { //AF: Modificato per stampare anche le transazioni usate per la ricerca. /*String msg = String.format( "Reindexing %d transactions from %s (%s) to %s", txns.size(), (new Date(curFromTimeInclusive)).toString(), txns.isEmpty() ? "---" : txns.get(0).getId().toString(), (new Date(toTimeExclusive)).toString());*/ String msg = String.format( "Reindexing %d transactions from %s (%s) to %s", txns.size(), (new Date(curFromTimeInclusive)).toString() +" - " +curFromTimeInclusive, txns.isEmpty() ? "---" : txns.get(0).getId().toString(), (new Date(toTimeExclusive)).toString() +" - " +toTimeExclusive); logger.debug(msg); } // Reindex the transactions. Voids between the last set of transactions and this // set will be detected as well. Additionally, the last max transaction will be // updated by this method. reindexTransactions(txns); // Move the time on. // Note the subtraction here. Yes, it's odd. But the results of the getNextTransactions // may be limited by recordset size and it is possible to have multiple transactions share // the same commit time. If these txns get split up and we exclude the time period, then // they won't be requeried. The list of previously used transaction IDs is passed back to // be excluded from the next query. curFromTimeInclusive = toTimeExclusive - 1L; fromTimeInclusiveMap.put(currentRepositoryId, new Long(curFromTimeInclusive)); curPreviousTxnIds.clear(); for (Transaction txn : txns) { curPreviousTxnIds.add(txn.getId()); } // Break out if there were no transactions processed if (curPreviousTxnIds.isEmpty()) { break; } // break out if the VM is shutting down if (isShuttingDown()) { break; } } lastMaxTxnIdMap.put(currentRepositoryId, new Long(lastMaxTxnId)); } /** * Find a transaction time to start indexing from (inclusive). The last recorded transaction by ID * is taken and the max transaction duration substracted from its commit time. A transaction is * retrieved for this time and checked for indexing. If it is present, then that value is chosen. * If not, a step back in time is taken again. This goes on until there are no more transactions * or a transaction is found in the index. */ protected long getStartingTxnCommitTime() { logger.info("[IndexTransactionTracker::getStartingTxnCommitTime] BEGIN on repository (" +RepositoryManager.getCurrentRepository() +")"); // Look back in time by the maximum transaction duration long toTimeExclusive = System.currentTimeMillis() - maxTxnDurationMs; long fromTimeInclusive = 0L; double stepFactor = 1.0D; found: while (true) { // Get the most recent transaction before the given look-back List<Transaction> nextTransactions = nodeDaoService.getTxnsByCommitTimeDescending( 0L, toTimeExclusive, 1, null); // There are no transactions in that time range if (nextTransactions.size() == 0) { break found; } // We found a transaction Transaction txn = nextTransactions.get(0); Long txnId = txn.getId(); long txnCommitTime = txn.getCommitTimeMs(); // Check that it is in the index InIndex txnInIndex = isTxnIdPresentInIndex(txnId); switch (txnInIndex) { case YES: fromTimeInclusive = txnCommitTime; break found; default: // Look further back in time. Step back by the maximum transaction duration and // increase this step back by a factor of 10% each iteration. toTimeExclusive = txnCommitTime - (long)(maxTxnDurationMs * stepFactor); stepFactor *= 1.1D; continue; } } logger.info("[IndexTransactionTracker::getStartingTxnCommitTime] END on repository (" +RepositoryManager.getCurrentRepository() +")"); // We have a starting value return fromTimeInclusive; } /** * Voids - otherwise known as 'holes' - in the transaction sequence are timestamped when they are * discovered. This method discards voids that were timestamped before the given date. It checks * all remaining voids, passing back the transaction time for the newly-filled void. Otherwise * the value passed in is passed back. * * @param fromTimeInclusive the oldest void to consider * @return Returns an adjused start position based on any voids being filled */ private long checkVoids(long fromTimeInclusive) { long maxHistoricalTime = (fromTimeInclusive - maxTxnDurationMs); long fromTimeAdjusted = fromTimeInclusive; Map<Long, TxnRecord> curVoids = voidsMap.get(RepositoryManager.getCurrentRepository()); // Map is mutable List<Long> toExpireTxnIds = new ArrayList<Long>(1); // The voids are stored in a sorted map, sorted by the txn ID for (Long voidTxnId : curVoids.keySet()) { TxnRecord voidTxnRecord = curVoids.get(voidTxnId); // Is the transaction around, yet? Transaction voidTxn = nodeDaoService.getTxnById(voidTxnId); if (voidTxn == null) { // It's still just a void. Shall we expire it? if (voidTxnRecord.txnCommitTime < maxHistoricalTime) { // It's too late for this void toExpireTxnIds.add(voidTxnId); } continue; } else { if (logger.isDebugEnabled()) { logger.debug("Void has become live: " + voidTxn); } //MB: 11:00:50 lunedi' 19 ottobre 2009 // a volte va in errore a questa riga, quando il commit time della tabella // alf_transaction e' uguale a NULL. Occorre analizzare meglio // quando questo problema accade, per evitare succeda // Con ALfresco 3.x e' stata introdotta questa if ;) //if (voidTxn.getCommitTimeMs() == null) // Just coping with Hibernate mysteries //{ //continue; //} //MB: 11:00:50 lunedi' 19 ottobre 2009 // We found one that has become a real transaction. // We don't throw the other voids away. fromTimeAdjusted = voidTxn.getCommitTimeMs(); // Break out as sequential rebuilding is required break; } } // Throw away all the expired ones for (Long toExpireTxnId : toExpireTxnIds) { curVoids.remove(toExpireTxnId); if (logger.isDebugEnabled()) { logger.debug("Void has expired: " + toExpireTxnId); } } // Done return fromTimeAdjusted; } private List<Transaction> getNextTransactions(long fromTimeInclusive, long toTimeExclusive, List<Long> previousTxnIds) { List<Transaction> txns = nodeDaoService.getTxnsByCommitTimeAscending( fromTimeInclusive, toTimeExclusive, maxRecordSetSize, previousTxnIds); if (logger.isDebugEnabled()) { logger.debug(String.format("Got transactions from \"%s\" to \"%s\":", (new Date(fromTimeInclusive)).toString(), (new Date(toTimeExclusive)).toString())); for (Transaction txn : txns) { logger.debug(String.format("\t\tID: %s - Server: %s - Commit time: %s", txn.getId(), txn.getServer().getIpAddress(), (new Date(txn.getCommitTimeMs())).toString())); } } // done return txns; } /** * Checks that each of the transactions is present in the index. As soon as one is found that * isn't, all the following transactions will be reindexed. After the reindexing, the sequence * of transaction IDs will be examined for any voids. These will be recorded. * * @param txns transactions ordered by time ascending * @return returns the */ private void reindexTransactions(List<Transaction> txns) { if (txns.isEmpty()) { return; } Set<Long> processedTxnIds = new HashSet<Long>(13); Map<Long, TxnRecord> curVoids = voidsMap.get(RepositoryManager.getCurrentRepository()); // Map is mutable boolean forceReindex = false; long minNewTxnId = Long.MAX_VALUE; long maxNewTxnId = Long.MIN_VALUE; long maxNewTxnCommitTime = System.currentTimeMillis(); for (Transaction txn : txns) { Long txnId = txn.getId(); long txnIdLong = txnId.longValue(); if (txnIdLong < minNewTxnId) { minNewTxnId = txnIdLong; } if (txnIdLong > maxNewTxnId) { maxNewTxnId = txnIdLong; maxNewTxnCommitTime = txn.getCommitTimeMs(); } // Keep track of it for void checking processedTxnIds.add(txnId); // Remove this entry from the void list - it is not void curVoids.remove(txnId); // Reindex the transaction if we are forcing it or if it isn't in the index already if (forceReindex || isTxnIdPresentInIndex(txnId) == InIndex.NO) { // Any indexing means that all the next transactions have to be indexed forceReindex = true; try { if (logger.isDebugEnabled()) { logger.debug("Reindexing transaction: " + txn); } // We try the reindex, but for the sake of continuity, have to let it run on reindexTransaction(txnId); } catch (Throwable e) { logger.warn("\n" + "Reindex of transaction failed: \n" + " Transaction ID: " + txnId + "\n" + " Error: " + e.getMessage(), e); } } else { if (logger.isDebugEnabled()) { logger.debug("Reindex skipping transaction: " + txn); } } } // We have to search for voids now. Don't start at the min transaction, // but start at the least of the lastMaxTxnId and minNewTxnId long voidCheckStartTxnId = (lastMaxTxnId < minNewTxnId ? lastMaxTxnId : minNewTxnId) + 1; long voidCheckEndTxnId = maxNewTxnId; // Check for voids in new transactions for (long i = voidCheckStartTxnId; i <= voidCheckEndTxnId; i++) { Long txnId = Long.valueOf(i); if (processedTxnIds.contains(txnId)) { // It is there continue; } // First make sure that it is a real void. Sometimes, transactions are in the table but don't // fall within the commit time window that we queried. If they're in the DB AND in the index, // then they're not really voids and don't need further checks. If they're missing from either, // then they're voids and must be processed. Transaction voidTxn = nodeDaoService.getTxnById(txnId); if (voidTxn != null && isTxnIdPresentInIndex(txnId) != InIndex.NO) { // It is a real transaction (not a void) and is already in the index, so just ignore it. continue; } // Calculate an age for the void. We can't use the current time as that will mean we keep all // discovered voids, even if they are very old. Rather, we use the commit time of the last transaction // in the set as it represents the query time for this iteration. TxnRecord voidRecord = new TxnRecord(); voidRecord.txnCommitTime = maxNewTxnCommitTime; curVoids.put(txnId, voidRecord); if (logger.isDebugEnabled()) { logger.debug("Void detected: " + txnId); } } // Having searched for the nodes, we've recorded all the voids. So move the lastMaxTxnId up. lastMaxTxnId = voidCheckEndTxnId; } private class TxnRecord { private long txnCommitTime; } public JobBusinessInterface getJobManager() { return jobManager; } public void setJobManager(JobBusinessInterface jobManager) { this.jobManager = jobManager; } public void setTenantAdminService(MultiTTenantAdminService tenantAdminService) { this.tenantAdminService = tenantAdminService; } }