/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jackrabbit.core.data; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Calendar; import java.util.Iterator; import java.util.concurrent.locks.ReentrantLock; import javax.jcr.RepositoryException; import org.apache.commons.io.IOUtils; import org.apache.jackrabbit.core.fs.FileSystem; import org.apache.jackrabbit.core.fs.FileSystemException; import org.apache.jackrabbit.core.fs.FileSystemResource; import org.apache.jackrabbit.core.fs.local.LocalFileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A MultiDataStore can handle two independent DataStores. * <p> * <b>Attention:</b> You will lost the global single instance mechanism ! * </p> * It can be used if you have two storage systems. One for fast access and a * other one like a archive DataStore on a slower storage system. All Files will * be added to the primary DataStore. On read operations first the primary * dataStore will be used and if no Record is found the archive DataStore will * be used. The GarabageCollector will only remove files from the archive * DataStore. * <p> * The internal MoveDataTask will be started automatically and could be * configured with the following properties. * <p> * The Configuration: * * <pre> * <DataStore class="org.apache.jackrabbit.core.data.MultiDataStore"> * <param name="{@link #setMaxAge(int) maxAge}" value="60"/> * <param name="{@link #setMoveDataTaskSleep(int) moveDataTaskSleep}" value="604800"/> * <param name="{@link #setMoveDataTaskFirstRunHourOfDay(int) moveDataTaskFirstRunHourOfDay}" value="1"/> * <param name="{@link #setSleepBetweenRecords(long) sleepBetweenRecords}" value="100"/> * <param name="{@link #setDelayedDelete(boolean) delayedDelete}" value="false"/> * <param name="{@link #setDelayedDeleteSleep(long) delayedDeleteSleep}" value="86400"/> * <param name="primary" value="org.apache.jackrabbit.core.data.db.DbDataStore"> * <param .../> * </param> * <param name="archive" value="org.apache.jackrabbit.core.data.FileDataStore"> * <param .../> * </param> * </DataStore> * </pre> * * <ul> * <li><code>maxAge</code>: defines how many days the content will reside in the * primary data store. DataRecords that have been added before this time span * will be moved to the archive data store. (default = <code>60</code>)</li> * <li><code>moveDataTaskSleep</code>: specifies the sleep time of the * moveDataTaskThread in seconds. (default = 60 * 60 * 24 * 7, which equals 7 * days)</li> * <li><code>moveDataTaskNextRunHourOfDay</code>: specifies the hour at which * the moveDataTaskThread initiates its first run (default = <code>1</code> * which means 01:00 at night)</li> * <li><code>sleepBetweenRecords</code>: specifies the delay in milliseconds * between scanning data records (default = <code>100</code>)</li> * <li><code>delayedDelete</code>: its possible to delay the delete operation on * the primary data store. The DataIdentifiers will be written to a temporary * file. The file will be processed after a defined sleep (see * <code>delayedDeleteSleep</code>) It's useful if you like to create a snapshot * of the primary data store backend in the meantime before the data will be * deleted. (default = <code>false</code>)</li> * <li><code>delayedDeleteSleep</code>: specifies the sleep time of the * delayedDeleteTaskThread in seconds. (default = 60 * 60 * 24, which equals 1 * day). This means the delayed delete from the primary data store will be * processed after one day.</li> * </ul> */ public class MultiDataStore implements DataStore { /** * Logger instance */ private static Logger log = LoggerFactory.getLogger(MultiDataStore.class); private DataStore primaryDataStore; private DataStore archiveDataStore; /** * Max Age in days. */ private int maxAge = 60; /** * ReentrantLock that is used while the MoveDataTask is running. */ private ReentrantLock moveDataTaskLock = new ReentrantLock(); private boolean moveDataTaskRunning = false; private Thread moveDataTaskThread; /** * The sleep time in seconds of the MoveDataTask, 7 day default. */ private int moveDataTaskSleep = 60 * 60 * 24 * 7; /** * Indicates when the next run of the move task is scheduled. The first run * is scheduled by default at 01:00 hours. */ private Calendar moveDataTaskNextRun = Calendar.getInstance(); /** * Its possible to delay the delete operation on the primary data store * while move task is running. The delete will be executed after defined * delayDeleteSleep. */ private boolean delayedDelete = false; /** * The sleep time in seconds to delay remove operation on the primary data * store, 1 day default. */ private long delayedDeleteSleep = 60 * 60 * 24; /** * File that holds the data identifiers if delayDelete is enabled. */ private FileSystemResource identifiersToDeleteFile = null; private Thread deleteDelayedIdentifiersTaskThread; /** * Name of the file which holds the identifiers if deleayed delete is * enabled */ private final String IDENTIFIERS_TO_DELETE_FILE_KEY = "identifiersToDelete"; /** * The delay time in milliseconds between scanning data records, 100 * default. */ private long sleepBetweenRecords = 100; { if (moveDataTaskNextRun.get(Calendar.HOUR_OF_DAY) >= 1) { moveDataTaskNextRun.add(Calendar.DAY_OF_MONTH, 1); } moveDataTaskNextRun.set(Calendar.HOUR_OF_DAY, 1); moveDataTaskNextRun.set(Calendar.MINUTE, 0); moveDataTaskNextRun.set(Calendar.SECOND, 0); moveDataTaskNextRun.set(Calendar.MILLISECOND, 0); } /** * Setter for the primary dataStore * * @param dataStore */ public void setPrimaryDataStore(DataStore dataStore) { this.primaryDataStore = dataStore; } /** * Setter for the archive dataStore * * @param dataStore */ public void setArchiveDataStore(DataStore dataStore) { this.archiveDataStore = dataStore; } /** * Check if a record for the given identifier exists in the primary data * store. If not found there it will be returned from the archive data * store. If no record exists, this method returns null. * * @param identifier * data identifier * @return the record if found, and null if not */ public DataRecord getRecordIfStored(DataIdentifier identifier) throws DataStoreException { if (moveDataTaskRunning) { moveDataTaskLock.lock(); } try { DataRecord dataRecord = primaryDataStore.getRecordIfStored(identifier); if (dataRecord == null) { dataRecord = archiveDataStore.getRecordIfStored(identifier); } return dataRecord; } finally { if (moveDataTaskRunning) { moveDataTaskLock.unlock(); } } } /** * Returns the identified data record from the primary data store. If not * found there it will be returned from the archive data store. The given * identifier should be the identifier of a previously saved data record. * Since records are never removed, there should never be cases where the * identified record is not found. Abnormal cases like that are treated as * errors and handled by throwing an exception. * * @param identifier * data identifier * @return identified data record * @throws DataStoreException * if the data store could not be accessed, or if the given * identifier is invalid */ public DataRecord getRecord(DataIdentifier identifier) throws DataStoreException { if (moveDataTaskRunning) { moveDataTaskLock.lock(); } try { return primaryDataStore.getRecord(identifier); } catch (DataStoreException e) { return archiveDataStore.getRecord(identifier); } finally { if (moveDataTaskRunning) { moveDataTaskLock.unlock(); } } } /** * Creates a new data record in the primary data store. The given binary * stream is consumed and a binary record containing the consumed stream is * created and returned. If the same stream already exists in another * record, then that record is returned instead of creating a new one. * <p> * The given stream is consumed and <strong>not closed</strong> by this * method. It is the responsibility of the caller to close the stream. A * typical call pattern would be: * * <pre> * InputStream stream = ...; * try { * record = store.addRecord(stream); * } finally { * stream.close(); * } * </pre> * * @param stream * binary stream * @return data record that contains the given stream * @throws DataStoreException * if the data store could not be accessed */ public DataRecord addRecord(InputStream stream) throws DataStoreException { return primaryDataStore.addRecord(stream); } /** * From now on, update the modified date of an object even when accessing it * in the archive data store. Usually, the modified date is only updated * when creating a new object, or when a new link is added to an existing * object. When this setting is enabled, even getLength() will update the * modified date. * * @param before * - update the modified date to the current time if it is older * than this value */ public void updateModifiedDateOnAccess(long before) { archiveDataStore.updateModifiedDateOnAccess(before); } /** * Delete objects that have a modified date older than the specified date * from the archive data store. * * @param min * the minimum time * @return the number of data records deleted * @throws DataStoreException */ public int deleteAllOlderThan(long min) throws DataStoreException { return archiveDataStore.deleteAllOlderThan(min); } /** * Get all identifiers from the archive data store. * * @return an iterator over all DataIdentifier objects * @throws DataStoreException * if the list could not be read */ public Iterator<DataIdentifier> getAllIdentifiers() throws DataStoreException { return archiveDataStore.getAllIdentifiers(); } public DataRecord getRecordFromReference(String reference) throws DataStoreException { DataRecord record = primaryDataStore.getRecordFromReference(reference); if (record == null) { record = archiveDataStore.getRecordFromReference(reference); } return record; } /** * {@inheritDoc} */ public void init(String homeDir) throws RepositoryException { if (delayedDelete) { // First initialize the identifiersToDeleteFile LocalFileSystem fileSystem = new LocalFileSystem(); fileSystem.setRoot(new File(homeDir)); identifiersToDeleteFile = new FileSystemResource(fileSystem, FileSystem.SEPARATOR + IDENTIFIERS_TO_DELETE_FILE_KEY); } moveDataTaskThread = new Thread(new MoveDataTask(), "Jackrabbit-MulitDataStore-MoveDataTaskThread"); moveDataTaskThread.setDaemon(true); moveDataTaskThread.start(); log.info("MultiDataStore-MoveDataTask thread started; first run scheduled at " + moveDataTaskNextRun.getTime()); if (delayedDelete) { try { // Run on startup the DeleteDelayedIdentifiersTask only if the // file exists and modify date is older than the // delayedDeleteSleep timeout ... if (identifiersToDeleteFile != null && identifiersToDeleteFile.exists() && (identifiersToDeleteFile.lastModified() + (delayedDeleteSleep * 1000)) < System .currentTimeMillis()) { deleteDelayedIdentifiersTaskThread = new Thread( //Start immediately ... new DeleteDelayedIdentifiersTask(0L), "Jackrabbit-MultiDataStore-DeleteDelayedIdentifiersTaskThread"); deleteDelayedIdentifiersTaskThread.setDaemon(true); deleteDelayedIdentifiersTaskThread.start(); log.info("Old entries in the " + IDENTIFIERS_TO_DELETE_FILE_KEY + " File found. DeleteDelayedIdentifiersTask-Thread started now."); } } catch (FileSystemException e) { throw new RepositoryException("I/O error while reading from '" + identifiersToDeleteFile.getPath() + "'", e); } } } /** * Get the minimum size of an object that should be stored in the primary * data store. * * @return the minimum size in bytes */ public int getMinRecordLength() { return primaryDataStore.getMinRecordLength(); } /** * {@inheritDoc} */ public void close() throws DataStoreException { DataStoreException lastException = null; // 1. close the primary data store try { primaryDataStore.close(); } catch (DataStoreException e) { lastException = e; } // 2. close the archive data store try { archiveDataStore.close(); } catch (DataStoreException e) { if (lastException != null) { lastException = new DataStoreException(lastException); } } // 3. if moveDataTaskThread is running interrupt it try { if (moveDataTaskRunning) { moveDataTaskThread.interrupt(); } } catch (Exception e) { if (lastException != null) { lastException = new DataStoreException(lastException); } } // 4. if deleteDelayedIdentifiersTaskThread is running interrupt it try { if (deleteDelayedIdentifiersTaskThread != null && deleteDelayedIdentifiersTaskThread.isAlive()) { deleteDelayedIdentifiersTaskThread.interrupt(); } } catch (Exception e) { if (lastException != null) { lastException = new DataStoreException(lastException); } } if (lastException != null) { throw lastException; } } /** * {@inheritDoc} */ public void clearInUse() { archiveDataStore.clearInUse(); } public int getMaxAge() { return maxAge; } public void setMaxAge(int maxAge) { this.maxAge = maxAge; } public int getMoveDataTaskSleep() { return moveDataTaskSleep; } public int getMoveDataTaskFirstRunHourOfDay() { return moveDataTaskNextRun.get(Calendar.HOUR_OF_DAY); } public void setMoveDataTaskSleep(int sleep) { this.moveDataTaskSleep = sleep; } public void setMoveDataTaskFirstRunHourOfDay(int hourOfDay) { moveDataTaskNextRun = Calendar.getInstance(); if (moveDataTaskNextRun.get(Calendar.HOUR_OF_DAY) >= hourOfDay) { moveDataTaskNextRun.add(Calendar.DAY_OF_MONTH, 1); } moveDataTaskNextRun.set(Calendar.HOUR_OF_DAY, hourOfDay); moveDataTaskNextRun.set(Calendar.MINUTE, 0); moveDataTaskNextRun.set(Calendar.SECOND, 0); moveDataTaskNextRun.set(Calendar.MILLISECOND, 0); } public void setSleepBetweenRecords(long millis) { this.sleepBetweenRecords = millis; } public long getSleepBetweenRecords() { return sleepBetweenRecords; } public boolean isDelayedDelete() { return delayedDelete; } public void setDelayedDelete(boolean delayedDelete) { this.delayedDelete = delayedDelete; } public long getDelayedDeleteSleep() { return delayedDeleteSleep; } public void setDelayedDeleteSleep(long delayedDeleteSleep) { this.delayedDeleteSleep = delayedDeleteSleep; } /** * Writes the given DataIdentifier to the delayedDeletedFile. * * @param identifier * @return boolean true if it was successful otherwise false */ private boolean writeDelayedDataIdentifier(DataIdentifier identifier) { BufferedWriter writer = null; try { File identifierFile = new File( ((LocalFileSystem) identifiersToDeleteFile.getFileSystem()).getPath(), identifiersToDeleteFile.getPath()); writer = new BufferedWriter(new FileWriter(identifierFile, true)); writer.write(identifier.toString()); return true; } catch (Exception e) { log.warn("I/O error while saving DataIdentifier (stacktrace on DEBUG log level) to '" + identifiersToDeleteFile.getPath() + "': " + e.getMessage()); log.debug("Root cause: ", e); return false; } finally { IOUtils.closeQuietly(writer); } } /** * Purges the delayedDeletedFile. * * @return boolean true if it was successful otherwise false */ private boolean purgeDelayedDeleteFile() { BufferedWriter writer = null; try { writer = new BufferedWriter(new OutputStreamWriter( identifiersToDeleteFile.getOutputStream())); writer.write(""); return true; } catch (Exception e) { log.warn("I/O error while purging (stacktrace on DEBUG log level) the " + IDENTIFIERS_TO_DELETE_FILE_KEY + " file '" + identifiersToDeleteFile.getPath() + "': " + e.getMessage()); log.debug("Root cause: ", e); return false; } finally { IOUtils.closeQuietly(writer); } } /** * Class for maintaining the MultiDataStore. It will be used to move the * content of the primary data store to the archive data store. */ public class MoveDataTask implements Runnable { /** * {@inheritDoc} */ public void run() { while (!Thread.currentThread().isInterrupted()) { try { log.info("Next move-data task run scheduled at " + moveDataTaskNextRun.getTime()); long sleepTime = moveDataTaskNextRun.getTimeInMillis() - System.currentTimeMillis(); if (sleepTime > 0) { Thread.sleep(sleepTime); } moveDataTaskRunning = true; moveOutdatedData(); moveDataTaskRunning = false; moveDataTaskNextRun.add(Calendar.SECOND, moveDataTaskSleep); if (delayedDelete) { if (deleteDelayedIdentifiersTaskThread != null && deleteDelayedIdentifiersTaskThread.isAlive()) { log.warn("The DeleteDelayedIdentifiersTask-Thread is already running."); } else { deleteDelayedIdentifiersTaskThread = new Thread( new DeleteDelayedIdentifiersTask(delayedDeleteSleep), "Jackrabbit-MultiDataStore-DeleteDelayedIdentifiersTaskThread"); deleteDelayedIdentifiersTaskThread.setDaemon(true); deleteDelayedIdentifiersTaskThread.start(); } } } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } log.warn("Interrupted: stopping move-data task."); } /** * Moves outdated data from primary to archive data store */ protected void moveOutdatedData() { try { long now = System.currentTimeMillis(); long maxAgeMilli = 1000L * 60 * 60 * 24 * maxAge; log.debug("Collecting all Identifiers from PrimaryDataStore..."); Iterator<DataIdentifier> allIdentifiers = primaryDataStore.getAllIdentifiers(); int moved = 0; while (allIdentifiers.hasNext()) { DataIdentifier identifier = allIdentifiers.next(); DataRecord dataRecord = primaryDataStore.getRecord(identifier); if ((dataRecord.getLastModified() + maxAgeMilli) < now) { try { moveDataTaskLock.lock(); if (delayedDelete) { // first write it to the file and then add it to // the archive data store ... if (writeDelayedDataIdentifier(identifier)) { archiveDataStore.addRecord(dataRecord.getStream()); moved++; } } else { // first add it and then delete it .. not really // atomic ... archiveDataStore.addRecord(dataRecord.getStream()); ((MultiDataStoreAware) primaryDataStore).deleteRecord(identifier); moved++; } if (moved % 100 == 0) { log.debug("Moving DataRecord's... ({})", moved); } } catch (DataStoreException e) { log.error("Failed to move DataRecord. DataIdentifier: " + identifier, e); } finally { moveDataTaskLock.unlock(); } } // Give other threads time to use the MultiDataStore while // MoveDataTask is running.. Thread.sleep(sleepBetweenRecords); } if (delayedDelete) { log.info("Moved " + moved + " DataRecords to the archive data store. The DataRecords in the primary data store will be removed in " + delayedDeleteSleep + " seconds."); } else { log.info("Moved " + moved + " DataRecords to the archive data store."); } } catch (Exception e) { log.warn("Failed to run move-data task.", e); } } } /** * Class to clean up the delayed DataRecords from the primary data store. */ public class DeleteDelayedIdentifiersTask implements Runnable { boolean run = true; private long sleepTime = 0L; /** * Constructor * @param sleep how long this DeleteDelayedIdentifiersTask should sleep in seconds. */ public DeleteDelayedIdentifiersTask(long sleep) { this.sleepTime = (sleep * 1000L); } @Override public void run() { if (moveDataTaskRunning) { log.warn("It's not supported to run the DeleteDelayedIdentifiersTask while the MoveDataTask is running."); return; } while (run && !Thread.currentThread().isInterrupted()) { if (sleepTime > 0) { try { Thread.sleep(sleepTime); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } log.info("Start to delete DataRecords from the primary data store."); BufferedReader reader = null; ArrayList<DataIdentifier> problemIdentifiers = new ArrayList<DataIdentifier>(); try { int deleted = 0; reader = new BufferedReader(new InputStreamReader( identifiersToDeleteFile.getInputStream())); while (true) { String s = reader.readLine(); if (s == null || s.equals("")) { break; } DataIdentifier identifier = new DataIdentifier(s); try { moveDataTaskLock.lock(); ((MultiDataStoreAware) primaryDataStore).deleteRecord(identifier); deleted++; } catch (DataStoreException e) { log.error("Failed to delete DataRecord. DataIdentifier: " + identifier, e); problemIdentifiers.add(identifier); } finally { moveDataTaskLock.unlock(); } // Give other threads time to use the MultiDataStore // while // DeleteDelayedIdentifiersTask is running.. Thread.sleep(sleepBetweenRecords); } log.info("Deleted " + deleted + " DataRecords from the primary data store."); if (problemIdentifiers.isEmpty()) { try { identifiersToDeleteFile.delete(); } catch (FileSystemException e) { log.warn("Unable to delete the " + IDENTIFIERS_TO_DELETE_FILE_KEY + " File."); if (!purgeDelayedDeleteFile()) { log.error("Unable to purge the " + IDENTIFIERS_TO_DELETE_FILE_KEY + " File."); } } } else { if (purgeDelayedDeleteFile()) { for (int x = 0; x < problemIdentifiers.size(); x++) { writeDelayedDataIdentifier(problemIdentifiers.get(x)); } } } } catch (InterruptedException e) { log.warn("Interrupted: stopping delayed-delete task."); Thread.currentThread().interrupt(); } catch (Exception e) { log.warn("Failed to run delayed-delete task.", e); } finally { IOUtils.closeQuietly(reader); run = false; } } } } }