/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.streaming.connectors.cassandra; import com.datastax.driver.core.Cluster; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; import org.apache.flink.api.java.ClosureCleaner; import org.apache.flink.streaming.runtime.operators.CheckpointCommitter; import java.util.HashMap; import java.util.Iterator; import java.util.Map; /** * CheckpointCommitter that saves information about completed checkpoints within a separate table in a cassandra * database. * * <p>Entries are in the form |operator_id | subtask_id | last_completed_checkpoint| */ public class CassandraCommitter extends CheckpointCommitter { private static final long serialVersionUID = 1L; private final ClusterBuilder builder; private transient Cluster cluster; private transient Session session; private String keySpace = "flink_auxiliary"; private String table = "checkpoints_"; /** * A cache of the last committed checkpoint ids per subtask index. This is used to * avoid redundant round-trips to Cassandra (see {@link #isCheckpointCommitted(int, long)}. */ private final Map<Integer, Long> lastCommittedCheckpoints = new HashMap<>(); public CassandraCommitter(ClusterBuilder builder) { this.builder = builder; ClosureCleaner.clean(builder, true); } public CassandraCommitter(ClusterBuilder builder, String keySpace) { this(builder); this.keySpace = keySpace; } /** * Internally used to set the job ID after instantiation. */ public void setJobId(String id) throws Exception { super.setJobId(id); table += id; } /** * Generates the necessary tables to store information. * * @throws Exception */ @Override public void createResource() throws Exception { cluster = builder.getCluster(); session = cluster.connect(); session.execute(String.format("CREATE KEYSPACE IF NOT EXISTS %s with replication={'class':'SimpleStrategy', 'replication_factor':1};", keySpace)); session.execute(String.format("CREATE TABLE IF NOT EXISTS %s.%s (sink_id text, sub_id int, checkpoint_id bigint, PRIMARY KEY (sink_id, sub_id));", keySpace, table)); try { session.close(); } catch (Exception e) { LOG.error("Error while closing session.", e); } try { cluster.close(); } catch (Exception e) { LOG.error("Error while closing cluster.", e); } } @Override public void open() throws Exception { if (builder == null) { throw new RuntimeException("No ClusterBuilder was set."); } cluster = builder.getCluster(); session = cluster.connect(); } @Override public void close() throws Exception { this.lastCommittedCheckpoints.clear(); try { session.close(); } catch (Exception e) { LOG.error("Error while closing session.", e); } try { cluster.close(); } catch (Exception e) { LOG.error("Error while closing cluster.", e); } } @Override public void commitCheckpoint(int subtaskIdx, long checkpointId) { String statement = String.format( "UPDATE %s.%s set checkpoint_id=%d where sink_id='%s' and sub_id=%d;", keySpace, table, checkpointId, operatorId, subtaskIdx); session.execute(statement); lastCommittedCheckpoints.put(subtaskIdx, checkpointId); } @Override public boolean isCheckpointCommitted(int subtaskIdx, long checkpointId) { // Pending checkpointed buffers are committed in ascending order of their // checkpoint id. This way we can tell if a checkpointed buffer was committed // just by asking the third-party storage system for the last checkpoint id // committed by the specified subtask. Long lastCommittedCheckpoint = lastCommittedCheckpoints.get(subtaskIdx); if (lastCommittedCheckpoint == null) { String statement = String.format( "SELECT checkpoint_id FROM %s.%s where sink_id='%s' and sub_id=%d;", keySpace, table, operatorId, subtaskIdx); Iterator<Row> resultIt = session.execute(statement).iterator(); if (resultIt.hasNext()) { lastCommittedCheckpoint = resultIt.next().getLong("checkpoint_id"); lastCommittedCheckpoints.put(subtaskIdx, lastCommittedCheckpoint); } } return lastCommittedCheckpoint != null && checkpointId <= lastCommittedCheckpoint; } }