/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.raid; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.raid.DBConnectionFactory; import org.apache.hadoop.raid.DBUtils; /** * Store the relationships between blocks and stripes into database */ public class DBStripeStore extends StripeStore { public static final Log LOG = LogFactory.getLog(DBStripeStore.class); public static final String DB_TABLE_NAME = "hdfs.raid.stripe.db.table"; // sleep time between two put stripe operations public static final String DB_PUT_STRIPE_SLEEP_TIME_KEY = "hdfs.raid.stripe.db.sleep"; public static final long DEFAULT_DB_PUT_STRIPE_SLEEP_TIME = 2000; public static final String[] STRIPESTORE_SPECIFIC_KEYS = new String[] { DB_TABLE_NAME }; private DBConnectionFactory connectionFactory; private String tblName; private long putStripeSleepTime = DEFAULT_DB_PUT_STRIPE_SLEEP_TIME; private int sqlNumRetries = DBUtils.DEFAULT_DB_MAX_RETRY; private String defaultUrl = null; // SQL Queries private String VALUES_STRING = "(?, ?, ?, ?, ?)"; private String CREATE_TABLE_SQL; private String DROP_TABLE_SQL; private String SELECT_WHERE_SQL; private String SELECT_COUNT_SQL; private String REPLACE_SQL; private String CLEAR_SQL; // stripe id related public String NEW_STRIPE_ID_SQL; public void constructSql() { CREATE_TABLE_SQL = "CREATE TABLE IF NOT EXISTS " + tblName + "(auto_id BIGINT NOT NULL AUTO_INCREMENT," + " block_id BIGINT NOT NULL," + " gen_stamp BIGINT NOT NULL," + " codec_id BIGINT NOT NULL," + " block_order TINYINT NOT NULL," + " stripe_id BIGINT NOT NULL," + " INDEX USING BTREE(block_id)," + " INDEX USING BTREE(stripe_id)," + " PRIMARY KEY(auto_id)," + " UNIQUE KEY `block` (block_id, gen_stamp, codec_id));"; DROP_TABLE_SQL = "DROP TABLE " + tblName; SELECT_WHERE_SQL = "SELECT a.block_id, a.gen_stamp, a.block_order FROM " + tblName + " AS a, " + tblName + " AS b WHERE " + " a.stripe_id = b.stripe_id AND b.block_id = ? " + " AND b.gen_stamp = ? AND b.codec_id = ? " + " order by a.block_order"; SELECT_COUNT_SQL = "SELECT COUNT(DISTINCT stripe_id) FROM " + tblName; REPLACE_SQL = "REPLACE INTO " + tblName + " (block_id, gen_stamp, codec_id, block_order, stripe_id)" + " VALUES "; NEW_STRIPE_ID_SQL = "REPLACE INTO " + tblName + "(block_id, gen_stamp," + " codec_id, block_order, stripe_id) values (?, ?, ?, ?, ?)"; CLEAR_SQL = "TRUNCATE TABLE " + tblName; } private void createTable() throws IOException { DBUtils.runInsert(connectionFactory, CREATE_TABLE_SQL, DBUtils.EMPTY_SQL_PARAMS, sqlNumRetries); LOG.info("Created a table " + tblName); } @Override public void initialize(Configuration conf, boolean createStore, FileSystem fs) throws IOException { connectionFactory = DBUtils.getDBConnectionFactory(conf); Configuration newConf = initializeConf(STRIPESTORE_SPECIFIC_KEYS, conf, fs); tblName = newConf.get(DB_TABLE_NAME); if (tblName == null) { throw new IOException("Config key " + DB_TABLE_NAME + " is not defined"); } constructSql(); sqlNumRetries = DBUtils.getSqlNumRetry(conf); if (createStore) { createTable(); } putStripeSleepTime = conf.getLong(DB_PUT_STRIPE_SLEEP_TIME_KEY, DEFAULT_DB_PUT_STRIPE_SLEEP_TIME); } @Override public int numStripes() throws IOException { Long count = DBUtils.selectCount(connectionFactory, SELECT_COUNT_SQL, DBUtils.EMPTY_SQL_PARAMS, sqlNumRetries, tblName); return count.intValue(); } @Override public StripeInfo getStripe(Codec codec, Block block) throws IOException { List<Object> sqlParams = new ArrayList<Object>(); sqlParams.add(Long.toString(block.getBlockId())); sqlParams.add(Long.toString(block.getGenerationStamp())); sqlParams.add(Long.toString(codec.id.hashCode())); List<List<Object>> results = DBUtils.runInsertSelect(connectionFactory, SELECT_WHERE_SQL, sqlParams, true, sqlNumRetries, DBUtils.RETRY_MAX_INTERVAL_SEC, false, false); if (results == null) { throw new IOException("You cannot select from " + tblName); } if (results.isEmpty()) { //No record is found return null; } // block_order of the first result should be negative number, its // absolute value is the order of the last block Block lastBlock = new Block((Long)results.get(0).get(0), 0L, (Long)results.get(0).get(1)); Integer lastBlockOrder = (Integer)results.get(0).get(2); if (lastBlockOrder >= 0 || (-lastBlockOrder != results.size() - 1) || results.size() <= codec.parityLength) { throw new IOException("Wrong result is returned for codec " + codec.id + " last block: " + lastBlock + " last block order: " + lastBlockOrder); } List<Block> parityBlocks = new ArrayList<Block>(); List<Block> sourceBlocks = new ArrayList<Block>(); for (int i = 1; i < results.size(); i++) { Block curBlock = new Block((Long)results.get(i).get(0), 0L, (Long)results.get(i).get(1)); Integer curBlockOrder = (Integer)results.get(i).get(2); if (curBlockOrder != i - 1) { throw new IOException("The " + (i-1) + "th block " + curBlock + "'s order is " + curBlockOrder); } if (i - 1 < codec.parityLength) { parityBlocks.add(curBlock); } else { sourceBlocks.add(curBlock); } } // Finally add the last block; sourceBlocks.add(lastBlock); StripeInfo si = new StripeInfo(codec, block, parityBlocks, sourceBlocks); LOG.info("Fetch " + codec.id + ":" + block + " -> " + si); return si; } private String getInsertStripeSql(List<Block> parityBlks, List<Block> srcBlks) { StringBuilder sb = new StringBuilder(REPLACE_SQL); for (int j = 0; j < parityBlks.size() + srcBlks.size(); j++) { if (j > 0) { sb.append(","); } sb.append(VALUES_STRING); } return sb.toString(); } private List<Object> constructGetStripeSqlParam(Codec codec, List<Block> parityBlks, List<Block> srcBlks) { List<Object> sqlParams = new ArrayList<Object>(); // insert just the first block int index = 0; sqlParams.add(Long.toString(parityBlks.get(index).getBlockId())); sqlParams.add(Long.toString(parityBlks.get(index).getGenerationStamp())); sqlParams.add(Long.toString(codec.id.hashCode())); sqlParams.add(Long.toString(index)); // temporary id will be overwritten later sqlParams.add(Long.toString(-1L)); return sqlParams; } private List<Object> constructInsertStripeSqlParam(Codec codec, List<Block> parityBlks, List<Block> srcBlks, Long stripeId) { List<Object> sqlParams = new ArrayList<Object>(); for (int i = 0; i < parityBlks.size(); i++) { sqlParams.add(Long.toString(parityBlks.get(i).getBlockId())); sqlParams.add(Long.toString(parityBlks.get(i).getGenerationStamp())); sqlParams.add(Long.toString(codec.id.hashCode())); sqlParams.add(Long.toString(i)); sqlParams.add(Long.toString(stripeId)); } int srcBlksSize = srcBlks.size(); for (int i = 0; i < srcBlksSize; i++) { sqlParams.add(Long.toString(srcBlks.get(i).getBlockId())); sqlParams.add(Long.toString(srcBlks.get(i).getGenerationStamp())); sqlParams.add(Long.toString(codec.id.hashCode())); int blockOrder = (i == srcBlksSize - 1)? -(i + codec.parityLength): (i + codec.parityLength); sqlParams.add(Long.toString(blockOrder)); sqlParams.add(Long.toString(stripeId)); } return sqlParams; } @Override public void putStripe(Codec codec, List<Block> parityBlks, List<Block> srcBlks) throws IOException { if (parityBlks.size() != codec.parityLength) { throw new IOException("Number of parity blocks " + parityBlks.size() + " doesn't match codec " + codec.id + " (" + codec.parityLength + ")"); } if (srcBlks.size() > codec.stripeLength) { throw new IOException("Number of source blocks " + srcBlks.size() + " is greater than codec " + codec.id + " (" + codec.stripeLength + ")"); } List<Object> getStripeSqlParams = constructGetStripeSqlParam(codec, parityBlks, srcBlks); String insertStripeSql = getInsertStripeSql(parityBlks, srcBlks); int waitMS = 3000; // wait for at least 3sec before next retry. Random rand = new Random(); for (int i = 0; i < sqlNumRetries; ++i) { Connection conn = null; PreparedStatement getStripeStatement = null; ResultSet generatedKeys = null; PreparedStatement insertStripeStatement = null; String url = null; try { try { url = connectionFactory.getUrl(true); } catch (IOException ioe) { LOG.warn("Cannot get DB URL, fall back to the default one:" + defaultUrl, ioe); url = defaultUrl; if (url == null) { throw ioe; } } LOG.info("Attepting connection with URL " + url); conn = connectionFactory.getConnection(url); conn.setAutoCommit(false); defaultUrl = url; getStripeStatement = DBUtils.getPreparedStatement(conn, NEW_STRIPE_ID_SQL, getStripeSqlParams, true); int recordsUpdated = getStripeStatement.executeUpdate(); LOG.info("rows inserted: " + recordsUpdated + " sql: " + NEW_STRIPE_ID_SQL); generatedKeys = getStripeStatement.getGeneratedKeys(); List<List<Object>> results = DBUtils.getResults(generatedKeys); Long stripeId = (Long)results.get(0).get(0); List<Object> insertStripeSqlParams = constructInsertStripeSqlParam(codec, parityBlks, srcBlks, stripeId); insertStripeStatement = DBUtils.getPreparedStatement(conn, insertStripeSql, insertStripeSqlParams, false); recordsUpdated = insertStripeStatement.executeUpdate(); conn.commit(); LOG.info("rows inserted: " + recordsUpdated + " sql: " + insertStripeSql); StripeInfo si = new StripeInfo(codec, null, parityBlks, srcBlks); LOG.info("Put " + si + " into stripe store"); Thread.sleep(putStripeSleepTime + rand.nextInt(1000)); return; } catch (Exception e) { // We should catch a better exception than Exception, but since // DBConnectionUrlFactory.getUrl() defines throws Exception, it's hard // for us to figure out the complete set it can throw. We follow // DBConnectionUrlFactory.getUrl()'s definition to catch Exception. // It shouldn't be a big problem as after numRetries, we anyway exit. LOG.info("Exception " + e + ". Will retry " + (sqlNumRetries - i) + " times."); // Introducing a random factor to the wait time before another retry. // The wait time is dependent on # of failures and a random factor. // At the first time of getting a SQLException, the wait time // is a random number between [0,300] msec. If the first retry // still fails, we will wait 300 msec grace period before the 2nd retry. // Also at the second retry, the waiting window is expanded to 600 msec // alleviating the request rate from the server. Similarly the 3rd retry // will wait 600 msec grace period before retry and the waiting window // is // expanded to 1200 msec. if (conn != null) { try { conn.rollback(); LOG.info("putStripe Transaction was rolled back"); } catch(SQLException excep) { LOG.error(excep); } } waitMS += waitMS; if (waitMS > DBUtils.RETRY_MAX_INTERVAL_SEC * 1000) { waitMS = DBUtils.RETRY_MAX_INTERVAL_SEC * 1000; } double waitTime = waitMS + waitMS * rand.nextDouble(); if (i + 1 == sqlNumRetries) { LOG.error("Still got Exception after " + sqlNumRetries + " retries.", e); throw new IOException(e); } try { Thread.sleep((long) waitTime); } catch (InterruptedException ie) { throw new IOException(ie); } } finally { try { if (conn != null) { conn.setAutoCommit(true); } } catch (SQLException sqlExp) { LOG.warn("Fail to set AutoCommit to true", sqlExp); } DBUtils.close(generatedKeys, new PreparedStatement[]{getStripeStatement, insertStripeStatement}, conn); } } } // Only used for testing @Override public void clear() throws IOException { DBUtils.runInsert(connectionFactory, CLEAR_SQL, DBUtils.EMPTY_SQL_PARAMS, sqlNumRetries); LOG.info("Clear all values from table " + tblName); } // Only used for testing public void dropTable() throws IOException { DBUtils.runInsert(connectionFactory, DROP_TABLE_SQL, DBUtils.EMPTY_SQL_PARAMS, sqlNumRetries); LOG.info("Drop table " + tblName); } public long getPutStripeSleepTime() { return putStripeSleepTime; } public DBConnectionFactory getConnectionFactory() { return connectionFactory; } }