/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.db; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.io.IOError; import org.apache.cassandra.utils.FBUtilities; import org.apache.log4j.Logger; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.dht.Token; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.db.filter.QueryPath; import org.apache.cassandra.db.filter.QueryFilter; import org.apache.cassandra.db.filter.NamesQueryFilter; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.config.DatabaseDescriptor; import java.net.InetAddress; import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.ExecutionException; public class SystemTable { private static Logger logger = Logger.getLogger(SystemTable.class); public static final String STATUS_CF = "LocationInfo"; // keep the old CF string for backwards-compatibility private static final String LOCATION_KEY = "L"; private static final String BOOTSTRAP_KEY = "Bootstrap"; private static final byte[] BOOTSTRAP = utf8("B"); private static final byte[] TOKEN = utf8("Token"); private static final byte[] GENERATION = utf8("Generation"); private static final byte[] CLUSTERNAME = utf8("ClusterName"); private static final byte[] PARTITIONER = utf8("Partioner"); private static StorageMetadata metadata; private static byte[] utf8(String str) { try { return str.getBytes("UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } /** * Record token being used by another node */ public static synchronized void updateToken(InetAddress ep, Token token) { IPartitioner p = StorageService.getPartitioner(); ColumnFamily cf = ColumnFamily.create(Table.SYSTEM_TABLE, STATUS_CF); cf.addColumn(new Column(ep.getAddress(), p.getTokenFactory().toByteArray(token), System.currentTimeMillis())); RowMutation rm = new RowMutation(Table.SYSTEM_TABLE, LOCATION_KEY); rm.add(cf); try { rm.apply(); } catch (IOException e) { throw new IOError(e); } } /** * This method is used to update the System Table with the new token for this node */ public static synchronized void updateToken(Token token) { assert metadata != null; IPartitioner p = StorageService.getPartitioner(); ColumnFamily cf = ColumnFamily.create(Table.SYSTEM_TABLE, STATUS_CF); cf.addColumn(new Column(SystemTable.TOKEN, p.getTokenFactory().toByteArray(token), System.currentTimeMillis())); RowMutation rm = new RowMutation(Table.SYSTEM_TABLE, LOCATION_KEY); rm.add(cf); try { rm.apply(); } catch (IOException e) { throw new IOError(e); } metadata.setToken(token); } /** * One of three things will happen if you try to read the system table: * 1. files are present and you can read them: great * 2. no files are there: great (new node is assumed) * 3. files are present but you can't read them: bad (suspect that the partitioner was changed). * @throws IOException */ public static void checkHealth() throws IOException { Table table = null; try { table = Table.open(Table.SYSTEM_TABLE); } catch (AssertionError err) { // this happens when a user switches from OPP to RP. IOException ex = new IOException("Could not read system table. Did you change partitioners?"); ex.initCause(err); throw ex; } SortedSet<byte[]> cols = new TreeSet<byte[]>(new BytesType()); cols.add(TOKEN); cols.add(GENERATION); cols.add(PARTITIONER); QueryFilter filter = new NamesQueryFilter(LOCATION_KEY, new QueryPath(STATUS_CF), cols); ColumnFamily cf = table.getColumnFamilyStore(STATUS_CF).getColumnFamily(filter); if (cf == null) { // this is either a brand new node (there will be no files), or the partitioner was changed from RP to OPP. for (String path : DatabaseDescriptor.getAllDataFileLocationsForTable("system")) { File[] dbContents = new File(path).listFiles(new FilenameFilter() { public boolean accept(File dir, String name) { return name.endsWith(".db"); } }); if (dbContents.length > 0) throw new IOException("Found system table files, but they couldn't be loaded. Did you change the partitioner?"); } // no system files. data is either in the commit log or this is a new node. return; } // token and generation should *always* be there. If either are missing, we can assume that the partitioner has // been switched. if (cf.getColumnCount() > 0 && (cf.getColumn(GENERATION) == null || cf.getColumn(TOKEN) == null)) throw new IOException("Couldn't read system generation or token. Did you change the partitioner?"); IColumn partitionerCol = cf.getColumn(PARTITIONER); if (partitionerCol != null && !DatabaseDescriptor.getPartitioner().getClass().getName().equals(new String(partitionerCol.value(), "UTF-8"))) throw new IOException("Detected partitioner mismatch! Did you change the partitioner?"); if (partitionerCol == null) logger.info("Did not see a partitioner in system storage."); } /* * This method reads the system table and retrieves the metadata * associated with this storage instance. Currently we store the * metadata in a Column Family called LocatioInfo which has two * columns namely "Token" and "Generation". This is the token that * gets gossiped around and the generation info is used for FD. * We also store whether we're in bootstrap mode in a third column */ public static synchronized StorageMetadata initMetadata() throws IOException { if (metadata != null) // guard to protect against being called twice return metadata; /* Read the system table to retrieve the storage ID and the generation */ Table table = Table.open(Table.SYSTEM_TABLE); SortedSet<byte[]> columns = new TreeSet<byte[]>(new BytesType()); columns.add(TOKEN); columns.add(GENERATION); columns.add(CLUSTERNAME); QueryFilter filter = new NamesQueryFilter(LOCATION_KEY, new QueryPath(STATUS_CF), columns); ColumnFamily cf = table.getColumnFamilyStore(STATUS_CF).getColumnFamily(filter); String partitioner = DatabaseDescriptor.getPartitioner().getClass().getName(); IPartitioner p = StorageService.getPartitioner(); if (cf == null) { Token token; String initialToken = DatabaseDescriptor.getInitialToken(); if (initialToken == null) token = p.getRandomToken(); else token = p.getTokenFactory().fromString(initialToken); logger.info("Saved Token not found. Using " + token); // seconds-since-epoch isn't a foolproof new generation // (where foolproof is "guaranteed to be larger than the last one seen at this ip address"), // but it's as close as sanely possible int generation = (int) (System.currentTimeMillis() / 1000); logger.info("Saved ClusterName not found. Using " + DatabaseDescriptor.getClusterName()); RowMutation rm = new RowMutation(Table.SYSTEM_TABLE, LOCATION_KEY); cf = ColumnFamily.create(Table.SYSTEM_TABLE, SystemTable.STATUS_CF); cf.addColumn(new Column(TOKEN, p.getTokenFactory().toByteArray(token))); cf.addColumn(new Column(GENERATION, FBUtilities.toByteArray(generation))); cf.addColumn(new Column(CLUSTERNAME, DatabaseDescriptor.getClusterName().getBytes())); cf.addColumn(new Column(PARTITIONER, partitioner.getBytes("UTF-8"))); rm.add(cf); rm.apply(); try { table.getColumnFamilyStore(SystemTable.STATUS_CF).forceBlockingFlush(); } catch (ExecutionException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } metadata = new StorageMetadata(token, generation, DatabaseDescriptor.getClusterName().getBytes()); return metadata; } if (cf.getColumnCount() < 2) throw new RuntimeException("Expected both token and generation columns; found " + cf); /* we crashed and came back up: make sure new generation is greater than old */ IColumn tokenColumn = cf.getColumn(TOKEN); assert tokenColumn != null : cf; Token token = p.getTokenFactory().fromByteArray(tokenColumn.value()); logger.info("Saved Token found: " + token); IColumn generation = cf.getColumn(GENERATION); assert generation != null : cf; int gen = Math.max(FBUtilities.byteArrayToInt(generation.value()) + 1, (int) (System.currentTimeMillis() / 1000)); IColumn cluster = cf.getColumn(CLUSTERNAME); IColumn partitionerColumn = cf.getColumn(PARTITIONER); RowMutation rm = new RowMutation(Table.SYSTEM_TABLE, LOCATION_KEY); cf = ColumnFamily.create(Table.SYSTEM_TABLE, SystemTable.STATUS_CF); Column generation2 = new Column(GENERATION, FBUtilities.toByteArray(gen), generation.timestamp() + 1); cf.addColumn(generation2); byte[] cname; if (cluster != null) { logger.info("Saved ClusterName found: " + new String(cluster.value())); cname = cluster.value(); } else { Column clustername = new Column(CLUSTERNAME, DatabaseDescriptor.getClusterName().getBytes()); cf.addColumn(clustername); cname = DatabaseDescriptor.getClusterName().getBytes(); logger.info("Saved ClusterName not found. Using " + DatabaseDescriptor.getClusterName()); } if (partitionerColumn == null) { Column c = new Column(PARTITIONER, partitioner.getBytes("UTF-8")); cf.addColumn(c); logger.info("Saved partitioner not found. Using " + partitioner); } rm.add(cf); rm.apply(); try { table.getColumnFamilyStore(SystemTable.STATUS_CF).forceBlockingFlush(); } catch (ExecutionException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } metadata = new StorageMetadata(token, gen, cname); return metadata; } public static boolean isBootstrapped() { Table table = null; try { table = Table.open(Table.SYSTEM_TABLE); QueryFilter filter = new NamesQueryFilter(BOOTSTRAP_KEY, new QueryPath(STATUS_CF), BOOTSTRAP); ColumnFamily cf = table.getColumnFamilyStore(STATUS_CF).getColumnFamily(filter); return cf != null && cf.getColumn(BOOTSTRAP).value()[0] == 1; } catch (IOException e) { throw new RuntimeException(e); } } public static void setBootstrapped(boolean isBootstrapped) { ColumnFamily cf = ColumnFamily.create(Table.SYSTEM_TABLE, STATUS_CF); cf.addColumn(new Column(BOOTSTRAP, new byte[] { (byte) (isBootstrapped ? 1 : 0) }, System.currentTimeMillis())); RowMutation rm = new RowMutation(Table.SYSTEM_TABLE, BOOTSTRAP_KEY); rm.add(cf); try { rm.apply(); } catch (IOException e) { throw new RuntimeException(e); } } public static class StorageMetadata { private Token token; private int generation; private byte[] cluster; StorageMetadata(Token storageId, int generation, byte[] clustername) { token = storageId; this.generation = generation; cluster = clustername; } public Token getToken() { return token; } public void setToken(Token storageId) { token = storageId; } public int getGeneration() { return generation; } public byte[] getClusterName() { return cluster; } } }