/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ignite.spi.checkpoint.sharedfs; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.util.Collection; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.Queue; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.IgniteLogger; import org.apache.ignite.internal.binary.BinaryMarshaller; import org.apache.ignite.internal.util.typedef.F; import org.apache.ignite.internal.util.typedef.internal.A; import org.apache.ignite.internal.util.typedef.internal.S; import org.apache.ignite.internal.util.typedef.internal.SB; import org.apache.ignite.internal.util.typedef.internal.U; import org.apache.ignite.marshaller.Marshaller; import org.apache.ignite.marshaller.MarshallerUtils; import org.apache.ignite.resources.IgniteInstanceResource; import org.apache.ignite.resources.LoggerResource; import org.apache.ignite.spi.IgniteSpiAdapter; import org.apache.ignite.spi.IgniteSpiConfiguration; import org.apache.ignite.spi.IgniteSpiConsistencyChecked; import org.apache.ignite.spi.IgniteSpiException; import org.apache.ignite.spi.IgniteSpiMBeanAdapter; import org.apache.ignite.spi.IgniteSpiMultipleInstancesSupport; import org.apache.ignite.spi.checkpoint.CheckpointListener; import org.apache.ignite.spi.checkpoint.CheckpointSpi; import org.jetbrains.annotations.Nullable; /** * This class defines shared file system {@link org.apache.ignite.spi.checkpoint.CheckpointSpi} implementation for * checkpoint SPI. All checkpoints are stored on shared storage and available for all * nodes in the grid. Note that every node must have access to the shared directory. The * reason the directory needs to be {@code shared} is because a job state * can be saved on one node and loaded on another (e.g. if a job gets * preempted on a different node after node failure). When started, this SPI tracks * all checkpoints saved by localhost for expiration. Note that this SPI does not * cache data stored in checkpoints - all the data is loaded from file system * on demand. * <p> * Directory paths for shared checkpoints should either be empty or contain previously * stored checkpoint files. * <p> * <h1 class="header">Configuration</h1> * <h2 class="header">Mandatory</h2> * This SPI has no mandatory configuration parameters. * <h2 class="header">Optional</h2> * This SPI has following optional configuration parameters: * <ul> * <li>Directory paths (see {@link #setDirectoryPaths(Collection)})</li> * </ul> * <h2 class="header">Java Example</h2> * {@link SharedFsCheckpointSpi} can be configured as follows: * <pre name="code" class="java"> * IgniteConfiguration cfg = new IgniteConfiguration(); * * SharedFsCheckpointSpi checkpointSpi = new SharedFsCheckpointSpi(); * * // List of checkpoint directories where all files are stored. * Collection<String> dirPaths = new ArrayList<String>(); * * dirPaths.add("/my/directory/path"); * dirPaths.add("/other/directory/path"); * * // Override default directory path. * checkpointSpi.setDirectoryPaths(dirPaths); * * // Override default checkpoint SPI. * cfg.setCheckpointSpi(checkpointSpi); * * // Starts grid. * G.start(cfg); * </pre> * <h2 class="header">Spring Example</h2> * {@link SharedFsCheckpointSpi} can be configured from Spring XML configuration file: * <pre name="code" class="xml"> * <bean id="grid.custom.cfg" class="org.apache.ignite.configuration.IgniteConfiguration" singleton="true"> * ... * <property name="checkpointSpi"> * <bean class="org.apache.ignite.spi.checkpoint.sharedfs.GridSharedFsCheckpointSpi"> * <!-- Change to shared directory path in your environment. --> * <property name="directoryPaths"> * <list> * <value>/my/directory/path</value> * <value>/other/directory/path</value> * </list> * </property> * </bean> * </property> * ... * </bean> * </pre> * <p> * <img src="http://ignite.apache.org/images/spring-small.png"> * <br> * For information about Spring framework visit <a href="http://www.springframework.org/">www.springframework.org</a> * @see org.apache.ignite.spi.checkpoint.CheckpointSpi */ @IgniteSpiMultipleInstancesSupport(true) @IgniteSpiConsistencyChecked(optional = false) public class SharedFsCheckpointSpi extends IgniteSpiAdapter implements CheckpointSpi { /** * Default checkpoint directory. Note that this path is relative to {@code IGNITE_HOME/work} folder * if {@code IGNITE_HOME} system or environment variable specified, otherwise it is relative to * {@code work} folder under system {@code java.io.tmpdir} folder. * * @see org.apache.ignite.configuration.IgniteConfiguration#getWorkDirectory() */ public static final String DFLT_DIR_PATH = "cp/sharedfs"; /** */ private static final String CODES = "0123456789QWERTYUIOPASDFGHJKLZXCVBNM"; /** */ private static final int CODES_LEN = CODES.length(); /** Grid logger. */ @LoggerResource private IgniteLogger log; /** Ignite instance. */ @IgniteInstanceResource private Ignite ignite; /** List of checkpoint directories where all files are stored. */ private Queue<String> dirPaths = new LinkedList<>(); /** Current folder where all checkpoints are saved. */ private String curDirPath = DFLT_DIR_PATH; /** * Either {@link #curDirPath} value if it is absolute * path or @{GRID_GAIN_HOME}/{@link #curDirPath} if one above was not found. */ private File folder; /** Local host name. */ private String host; /** Ignite instance name. */ private String igniteInstanceName; /** Task that takes care about outdated files. */ private SharedFsTimeoutTask timeoutTask; /** Listener. */ private CheckpointListener lsnr; /** Marshaller. */ private Marshaller marsh; /** * Initializes default directory paths. */ public SharedFsCheckpointSpi() { dirPaths.offer(DFLT_DIR_PATH); } /** * Gets collection of all configured paths where checkpoints can be saved. * * @return Collection of all configured paths. */ public Collection<String> getDirectoryPaths() { return dirPaths; } /** * Gets path to the directory where all checkpoints are saved. * * @return Path to the checkpoints directory. */ public String getCurrentDirectoryPath() { return curDirPath; } /** * Sets path to a shared directory where checkpoints will be stored. The * path can either be absolute or relative to {@code IGNITE_HOME} system * or environment variable. * <p> * If not provided, default value is {@link #DFLT_DIR_PATH}. * * @param dirPaths Absolute or Ignite installation home folder relative path where checkpoints * will be stored. * @return {@code this} for chaining. */ @IgniteSpiConfiguration(optional = true) public SharedFsCheckpointSpi setDirectoryPaths(Collection<String> dirPaths) { A.ensure(!F.isEmpty(dirPaths), "!F.isEmpty(dirPaths)"); this.dirPaths.clear(); this.dirPaths.addAll(dirPaths); return this; } /** {@inheritDoc} */ @Override public void spiStart(String igniteInstanceName) throws IgniteSpiException { // Start SPI start stopwatch. startStopwatch(); assertParameter(!F.isEmpty(dirPaths), "!F.isEmpty(dirPaths)"); this.igniteInstanceName = igniteInstanceName; if (ignite.configuration().getMarshaller() instanceof BinaryMarshaller) marsh = MarshallerUtils.jdkMarshaller(ignite.name()); else marsh = ignite.configuration().getMarshaller(); folder = getNextSharedPath(); if (folder == null) throw new IgniteSpiException("Failed to create checkpoint directory."); if (!folder.isDirectory()) throw new IgniteSpiException("Checkpoint directory path is not a valid directory: " + curDirPath); registerMBean(igniteInstanceName, new SharedFsCheckpointSpiMBeanImpl(this), SharedFsCheckpointSpiMBean.class); // Ack parameters. if (log.isDebugEnabled()) { log.debug(configInfo("folder", folder)); log.debug(configInfo("dirPaths", dirPaths)); } try { host = U.getLocalHost().getHostName(); } catch (IOException e) { throw new IgniteSpiException("Failed to get localhost address.", e); } // Ack ok start. if (log.isDebugEnabled()) log.debug(startInfo()); } /** {@inheritDoc} */ @Override public void spiStop() throws IgniteSpiException { if (timeoutTask != null) { U.interrupt(timeoutTask); U.join(timeoutTask, log); } unregisterMBean(); // Clean resources. folder = null; host = null; // Ack ok stop. if (log.isDebugEnabled()) log.debug(stopInfo()); } /** * Gets next available shared path if possible or {@code null}. * * @return File object represented shared directory. * @throws org.apache.ignite.spi.IgniteSpiException Throws if initializing has filed. */ @Nullable private File getNextSharedPath() throws IgniteSpiException { if (folder != null) { folder = null; dirPaths.poll(); } if (timeoutTask != null) { U.interrupt(timeoutTask); U.join(timeoutTask, log); } while (!dirPaths.isEmpty()) { curDirPath = dirPaths.peek(); if (new File(curDirPath).exists()) folder = new File(curDirPath); else { try { folder = U.resolveWorkDirectory(ignite.configuration().getWorkDirectory(), curDirPath, false); } catch (IgniteCheckedException e) { if (log.isDebugEnabled()) log.debug("Failed to resolve directory [path=" + curDirPath + ", exception=" + e.getMessage() + ']'); // Remove failed directory. dirPaths.poll(); // Select next shared directory if exists, otherwise throw exception. if (!dirPaths.isEmpty()) continue; else throw new IgniteSpiException("Failed to resolve directory: " + curDirPath + ']', e); } if (log.isDebugEnabled()) log.debug("Created shared filesystem checkpoint directory: " + folder.getAbsolutePath()); } break; } if (folder != null) { Map<File, SharedFsTimeData> files = new HashMap<>(); // Track expiration for only those files that are made by this node // to avoid file access conflicts. for (File file : getFiles()) { if (file.exists()) { if (log.isDebugEnabled()) log.debug("Checking checkpoint file: " + file.getAbsolutePath()); try { SharedFsCheckpointData data = SharedFsUtils.read(file, marsh, log); if (data.getHost().equals(host)) { files.put(file, new SharedFsTimeData(data.getExpireTime(), file.lastModified(), data.getKey())); if (log.isDebugEnabled()) log.debug("Registered existing checkpoint from: " + file.getAbsolutePath()); } } catch (IgniteCheckedException e) { U.error(log, "Failed to unmarshal objects in checkpoint file (ignoring): " + file.getAbsolutePath(), e); } catch (IOException e) { U.error(log, "IO error reading checkpoint file (ignoring): " + file.getAbsolutePath(), e); } } } timeoutTask = new SharedFsTimeoutTask(igniteInstanceName, marsh, log); timeoutTask.setCheckpointListener(lsnr); timeoutTask.add(files); timeoutTask.start(); } return folder; } /** * Returns new file name for the given key. Since fine name is based on the key, * the key must be unique. This method converts string key into hexadecimal-based * string to avoid conflicts of special characters in file names. * * @param key Unique checkpoint key. * @return Unique checkpoint file name. */ private String getUniqueFileName(CharSequence key) { assert key != null; SB sb = new SB(); // To be overly safe we'll limit file name size // to 128 characters (124 characters name + 4 character extension). // We also limit file name to upper case only to avoid surprising // behavior between Windows and Unix file systems. for (int i = 0; i < key.length() && i < 124; i++) sb.a(CODES.charAt(key.charAt(i) % CODES_LEN)); return sb.a(".gcp").toString(); } /** {@inheritDoc} */ @Override public byte[] loadCheckpoint(String key) throws IgniteSpiException { assert key != null; File file = new File(folder, getUniqueFileName(key)); if (file.exists()) try { SharedFsCheckpointData data = SharedFsUtils.read(file, marsh, log); return data != null ? data.getExpireTime() == 0 || data.getExpireTime() > U.currentTimeMillis() ? data.getState() : null : null; } catch (IgniteCheckedException e) { throw new IgniteSpiException("Failed to unmarshal objects in checkpoint file: " + file.getAbsolutePath(), e); } catch (IOException e) { throw new IgniteSpiException("Failed to read checkpoint file: " + file.getAbsolutePath(), e); } return null; } /** {@inheritDoc} */ @Override public boolean saveCheckpoint(String key, byte[] state, long timeout, boolean overwrite) throws IgniteSpiException { assert key != null; long expireTime = 0; if (timeout > 0) { expireTime = U.currentTimeMillis() + timeout; if (expireTime < 0) expireTime = Long.MAX_VALUE; } boolean saved = false; while (!saved) { File file = new File(folder, getUniqueFileName(key)); if (file.exists()) { if (!overwrite) return false; if (log.isDebugEnabled()) log.debug("Overriding existing file: " + file.getAbsolutePath()); } try { SharedFsUtils.write(file, new SharedFsCheckpointData(state, expireTime, host, key), marsh, log); } catch (IOException e) { // Select next shared directory if exists, otherwise throw exception if (getNextSharedPath() != null) continue; else throw new IgniteSpiException("Failed to write checkpoint data into file: " + file.getAbsolutePath(), e); } catch (IgniteCheckedException e) { throw new IgniteSpiException("Failed to marshal checkpoint data into file: " + file.getAbsolutePath(), e); } if (timeout > 0) timeoutTask.add(file, new SharedFsTimeData(expireTime, file.lastModified(), key)); saved = true; } return true; } /** * Returns list of files in checkpoint directory. * All sub-directories and their files are skipped. * * @return Array of open file descriptors. */ private File[] getFiles() { assert folder != null; return folder.listFiles(new FileFilter() { @Override public boolean accept(File pathName) { return !pathName.isDirectory(); } }); } /** {@inheritDoc} */ @Override public boolean removeCheckpoint(String key) { assert key != null; File file = new File(folder, getUniqueFileName(key)); if (timeoutTask != null) timeoutTask.remove(file); boolean rmv = file.delete(); if (rmv) { CheckpointListener lsnr = this.lsnr; if (lsnr != null) lsnr.onCheckpointRemoved(key); } return rmv; } /** {@inheritDoc} */ @Override public void setCheckpointListener(CheckpointListener lsnr) { this.lsnr = lsnr; if (timeoutTask != null) timeoutTask.setCheckpointListener(lsnr); } /** {@inheritDoc} */ @Override public SharedFsCheckpointSpi setName(String name) { super.setName(name); return this; } /** {@inheritDoc} */ @Override public String toString() { return S.toString(SharedFsCheckpointSpi.class, this); } /** * MBean implementation for SharedFsCheckpointSpi. */ private class SharedFsCheckpointSpiMBeanImpl extends IgniteSpiMBeanAdapter implements SharedFsCheckpointSpiMBean { /** {@inheritDoc} */ SharedFsCheckpointSpiMBeanImpl(IgniteSpiAdapter spiAdapter) { super(spiAdapter); } /** {@inheritDoc} */ @Override public Collection<String> getDirectoryPaths() { return SharedFsCheckpointSpi.this.getDirectoryPaths(); } /** {@inheritDoc} */ @Override public String getCurrentDirectoryPath() { return SharedFsCheckpointSpi.this.getCurrentDirectoryPath(); } } }