/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.runtime.operators;
import java.io.Serializable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class is used to save information about which sink operator instance has committed checkpoints to a backend.
* <p/>
* The current checkpointing mechanism is ill-suited for sinks relying on backends that do not support roll-backs.
* When dealing with such a system, while trying to get exactly-once semantics, one may neither commit data while
* creating the snapshot (since another sink instance may fail, leading to a replay on the same data) nor when receiving
* a checkpoint-complete notification (since a subsequent failure would leave us with no knowledge as to whether data
* was committed or not).
* <p/>
* A CheckpointCommitter can be used to solve the second problem by saving whether an instance committed all data
* belonging to a checkpoint. This data must be stored in a backend that is persistent across retries (which rules
* out Flink's state mechanism) and accessible from all machines, like a database or distributed file.
* <p/>
* There is no mandate as to how the resource is shared; there may be one resource for all Flink jobs, or one for
* each job/operator/-instance separately. This implies that the resource must not be cleaned up by the system itself,
* and as such should kept as small as possible.
*/
public abstract class CheckpointCommitter implements Serializable {
protected static final Logger LOG = LoggerFactory.getLogger(CheckpointCommitter.class);
protected String jobId;
protected String operatorId;
/**
* Internally used to set the job ID after instantiation.
*
* @param id
* @throws Exception
*/
public void setJobId(String id) throws Exception {
this.jobId = id;
}
/**
* Internally used to set the operator ID after instantiation.
*
* @param id
* @throws Exception
*/
public void setOperatorId(String id) throws Exception {
this.operatorId = id;
}
/**
* Opens/connects to the resource, and possibly creates it beforehand.
*
* @throws Exception
*/
public abstract void open() throws Exception;
/**
* Closes the resource/connection to it. The resource should generally still exist after this call.
*
* @throws Exception
*/
public abstract void close() throws Exception;
/**
* Creates/opens/connects to the resource that is used to store information. Called once directly after instantiation.
* @throws Exception
*/
public abstract void createResource() throws Exception;
/**
* Mark the given checkpoint as completed in the resource.
*
* @param subtaskIdx the index of the subtask responsible for committing the checkpoint.
* @param checkpointID the id of the checkpoint to be committed.
* @throws Exception
*/
public abstract void commitCheckpoint(int subtaskIdx, long checkpointID) throws Exception;
/**
* Checked the resource whether the given checkpoint was committed completely.
*
* @param subtaskIdx the index of the subtask responsible for committing the checkpoint.
* @param checkpointID the id of the checkpoint we are interested in.
* @return true if the checkpoint was committed completely, false otherwise
* @throws Exception
*/
public abstract boolean isCheckpointCommitted(int subtaskIdx, long checkpointID) throws Exception;
}